diff -urN linux-2.4.20/kernel/sched.c linux/kernel/sched.c --- linux-2.4.20/kernel/sched.c 2003-02-17 16:39:11.000000000 +0100 +++ linux/kernel/sched.c 2003-02-17 16:37:17.000000000 +0100 @@ -157,6 +157,8 @@ task_t *migration_thread; struct list_head migration_queue; + unsigned long soft_lockup_timestamp; + unsigned long soft_lockup_message; atomic_t nr_iowait; } ____cacheline_aligned; @@ -2099,6 +2101,60 @@ #endif +void check_softlockup(struct pt_regs *regs) +{ + runqueue_t *rq = this_rq(); + + if (time_before(rq->soft_lockup_timestamp + 180*HZ, jiffies)) { + if (!rq->soft_lockup_message) { + rq->soft_lockup_message = 1; + printk(KERN_CRIT "kwatchdog detected soft-lockup: kernel looping for more than 180 seconds!\n"); + show_regs(regs); + } + } else + rq->soft_lockup_message = 0; +} + +static int kdog_thread(void * data) +{ + struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; + int cpu = (long) data; + + daemonize(); + sigfillset(¤t->blocked); + set_fs(KERNEL_DS); + +#if CONFIG_SMP + /* + * Either we are running on the right CPU, or there's a + * a migration thread on the target CPU, guaranteed. + */ + set_cpus_allowed(current, 1UL << cpu); +#endif + setscheduler(0, SCHED_FIFO, ¶m); + + sprintf(current->comm, "kwatchdog/%d", cpu); + + for (;;) { + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(HZ); + this_rq()->soft_lockup_timestamp = jiffies; + } +} + +__init int spawn_kdogd(void) +{ + int cpu; + + for (cpu = 0; cpu < smp_num_cpus; cpu++) + if (kernel_thread(kdog_thread, (void *)cpu, CLONE_KERNEL) < 0) + BUG(); + + return 0; +} + +__initcall(spawn_kdogd); + extern void init_timervecs(void); extern void timer_bh(void); diff -urN linux-2.4.20/kernel/timer.c linux/kernel/timer.c --- linux-2.4.20/kernel/timer.c 2003-02-17 16:39:11.000000000 +0100 +++ linux/kernel/timer.c 2003-02-17 16:37:17.000000000 +0100 @@ -668,9 +668,12 @@ run_timer_list(); } +extern void check_softlockup(struct pt_regs *regs); + void do_timer(struct pt_regs *regs) { (*(unsigned long *)&jiffies)++; + check_softlockup(regs); #ifndef CONFIG_SMP /* SMP process accounting uses the local APIC timer */