diff -urN linux-2.4.20/kernel/signal.c linux-2.4.20-dj/kernel/signal.c --- linux-2.4.20/kernel/signal.c 2003-08-07 15:21:39.000000000 +0100 +++ linux-2.4.20-dj/kernel/signal.c 2003-08-07 18:25:12.000000000 +0100 @@ -148,6 +148,11 @@ (!T(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) +#define sig_avoid_stop_race() \ + (sigtestsetmask(¤t->pending.signal, M(SIGCONT) | M(SIGKILL)) || \ + sigtestsetmask(¤t->signal->shared_pending.signal, \ + M(SIGCONT) | M(SIGKILL))) + /* * Re-calculate pending state from the set of locally pending * signals, globally pending signals, and blocked signals. @@ -1346,16 +1351,13 @@ struct sighand_struct *sighand = current->sighand; int stop_count = -1; + /* spin_lock_irq(&sighand->siglock) is now done in caller */ + if (sig->group_stop_count > 0) { /* * There is a group stop in progress. We don't need to * start another one. */ - spin_lock_irq(&sighand->siglock); - if (unlikely(sig->group_stop_count == 0)) { - spin_unlock_irq(&sighand->siglock); - return; - } signr = sig->group_exit_code; stop_count = --sig->group_stop_count; current->exit_code = signr; @@ -1364,17 +1366,27 @@ } else if (thread_group_empty(current)) { /* - * No locks needed in this case. + * Lock must be held through transition to stopped state. */ current->exit_code = signr; set_current_state(TASK_STOPPED); + spin_unlock_irq(&sighand->siglock); } else { /* * There is no group stop already in progress. - * We must initiate one now. + * We must initiate one now, but that requires + * dropping siglock to get both the tasklist lock + * and siglock again in the proper order. Note that + * this allows an intervening SIGCONT to be posted. + * We need to check for that and bail out if necessary. */ struct task_struct *t; + + spin_unlock_irq(&sighand->siglock); + + /* signals can be posted during this window */ + read_lock(&tasklist_lock); spin_lock_irq(&sighand->siglock); @@ -1389,6 +1401,16 @@ return; } + if (unlikely(sig_avoid_stop_race())) { + /* + * Either a SIGCONT or a SIGKILL signal was + * posted in the siglock-not-held window. + */ + spin_unlock_irq(&sighand->siglock); + read_unlock(&tasklist_lock); + return; + } + if (sig->group_stop_count == 0) { sig->group_exit_code = signr; stop_count = 0; @@ -1427,12 +1449,13 @@ int get_signal_to_deliver(siginfo_t *info, struct pt_regs *regs) { sigset_t *mask = ¤t->blocked; + int signr = 0; +relock: + spin_lock_irq(¤t->sighand->siglock); for (;;) { - unsigned long signr = 0; struct k_sigaction *ka; - spin_lock_irq(¤t->sighand->siglock); if (unlikely(current->signal->group_stop_count > 0)) { int stop_count; if (current->signal->group_exit_task == current) { @@ -1455,39 +1478,38 @@ set_current_state(TASK_STOPPED); spin_unlock_irq(¤t->sighand->siglock); finish_stop(stop_count); - continue; + goto relock; } dequeue: signr = dequeue_signal(mask, info); - spin_unlock_irq(¤t->sighand->siglock); if (!signr) - break; + break; /* will return 0 */ if ((current->ptrace & PT_PTRACED) && signr != SIGKILL) { /* * If there is a group stop in progress, * we must participate in the bookkeeping. */ - if (current->signal->group_stop_count > 0) { - spin_lock_irq(¤t->sighand->siglock); + if (current->signal->group_stop_count > 0) --current->signal->group_stop_count; - spin_unlock_irq(¤t->sighand->siglock); - } /* Let the debugger run. */ current->exit_code = signr; current->last_siginfo = info; set_current_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); notify_parent(current, SIGCHLD); schedule(); current->last_siginfo = NULL; /* We're back. Did the debugger cancel the sig? */ + spin_lock_irq(¤t->sighand->siglock); signr = current->exit_code; if (signr == 0) continue; + current->exit_code = 0; /* Update the siginfo structure if the signal has @@ -1504,9 +1526,7 @@ /* If the (new) signal is now blocked, requeue it. */ if (sigismember(¤t->blocked, signr)) { - spin_lock_irq(¤t->sighand->siglock); specific_send_sig_info(signr, info, current); - spin_unlock_irq(¤t->sighand->siglock); continue; } } @@ -1515,7 +1535,7 @@ if (ka->sa.sa_handler == SIG_IGN) /* Do nothing. */ continue; if (ka->sa.sa_handler != SIG_DFL) /* Run the handler. */ - return signr; + break; /* will return non-zero "signr" value */ /* * Now we are doing the default action for this signal. @@ -1532,20 +1552,44 @@ * The default action is to stop all threads in * the thread group. The job control signals * do nothing in an orphaned pgrp, but SIGSTOP - * always works. + * always works. Note that siglock needs to be + * dropped during the call to is_orphaned_pgrp() + * because of lock ordering with tasklist_lock. + * This allows an intervening SIGCONT to be posted. + * We need to check for that and bail out if necessary. */ - if (signr == SIGSTOP || - !is_orphaned_pgrp(current->pgrp)) - do_signal_stop(signr); - continue; + if (signr == SIGSTOP) { + do_signal_stop(signr); /* releases siglock */ + goto relock; + } + spin_unlock_irq(¤t->sighand->siglock); + + /* signals can be posted during this window */ + + if (is_orphaned_pgrp(current->pgrp)) + goto relock; + + spin_lock_irq(¤t->sighand->siglock); + if (unlikely(sig_avoid_stop_race())) { + /* + * Either a SIGCONT or a SIGKILL signal was + * posted in the siglock-not-held window. + */ + continue; + } + + do_signal_stop(signr); /* releases siglock */ + goto relock; } + spin_unlock_irq(¤t->sighand->siglock); + /* * Anything else is fatal, maybe with a core dump. */ current->flags |= PF_SIGNALED; if (sig_kernel_coredump(signr) && - do_coredump(signr, signr, regs)) { + do_coredump((long)signr, signr, regs)) { /* * That killed all other threads in the group and * synchronized with their demise, so there can't @@ -1559,8 +1603,8 @@ BUG_ON(!current->signal->group_exit); BUG_ON(current->signal->group_exit_code != code); do_exit(code); - /* NOTREACHED */ - } + /* NOTREACHED */ + } /* * Death signals, no core dump. @@ -1568,7 +1612,8 @@ do_group_exit(signr); /* NOTREACHED */ } - return 0; + spin_unlock_irq(¤t->sighand->siglock); + return signr; } #endif