2 * linux/kernel/softirq.c
4 * Copyright (C) 1992 Linus Torvalds
6 * Distribute under GPLv2.
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
10 * Remote softirq infrastructure is by Jens Axboe.
13 #include <linux/module.h>
14 #include <linux/kernel_stat.h>
15 #include <linux/interrupt.h>
16 #include <linux/init.h>
18 #include <linux/notifier.h>
19 #include <linux/percpu.h>
20 #include <linux/cpu.h>
21 #include <linux/freezer.h>
22 #include <linux/kthread.h>
23 #include <linux/rcupdate.h>
24 #include <linux/smp.h>
25 #include <linux/tick.h>
29 - No shared variables, all the data are CPU local.
30 - If a softirq needs serialization, let it serialize itself
32 - Even if softirq is serialized, only local cpu is marked for
33 execution. Hence, we get something sort of weak cpu binding.
34 Though it is still not clear, will it result in better locality
38 - NET RX softirq. It is multithreaded and does not require
39 any global serialization.
40 - NET TX softirq. It kicks software netdevice queues, hence
41 it is logically serialized per device, but this serialization
42 is invisible to common code.
43 - Tasklets: serialized wrt itself.
46 #ifndef __ARCH_IRQ_STAT
47 irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
48 EXPORT_SYMBOL(irq_stat);
51 static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
53 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
56 * we cannot loop indefinitely here to avoid userspace starvation,
57 * but we also don't want to introduce a worst case 1/HZ latency
58 * to the pending events, so lets the scheduler to balance
59 * the softirq load for us.
61 static inline void wakeup_softirqd(void)
63 /* Interrupts are disabled: no need to stop preemption */
64 struct task_struct *tsk = __get_cpu_var(ksoftirqd);
66 if (tsk && tsk->state != TASK_RUNNING)
71 * This one is for softirq.c-internal use,
72 * where hardirqs are disabled legitimately:
74 #ifdef CONFIG_TRACE_IRQFLAGS
75 static void __local_bh_disable(unsigned long ip)
79 WARN_ON_ONCE(in_irq());
81 raw_local_irq_save(flags);
82 add_preempt_count(SOFTIRQ_OFFSET);
84 * Were softirqs turned off above:
86 if (softirq_count() == SOFTIRQ_OFFSET)
87 trace_softirqs_off(ip);
88 raw_local_irq_restore(flags);
90 #else /* !CONFIG_TRACE_IRQFLAGS */
91 static inline void __local_bh_disable(unsigned long ip)
93 add_preempt_count(SOFTIRQ_OFFSET);
96 #endif /* CONFIG_TRACE_IRQFLAGS */
98 void local_bh_disable(void)
100 __local_bh_disable((unsigned long)__builtin_return_address(0));
103 EXPORT_SYMBOL(local_bh_disable);
106 * Special-case - softirqs can safely be enabled in
107 * cond_resched_softirq(), or by __do_softirq(),
108 * without processing still-pending softirqs:
110 void _local_bh_enable(void)
112 WARN_ON_ONCE(in_irq());
113 WARN_ON_ONCE(!irqs_disabled());
115 if (softirq_count() == SOFTIRQ_OFFSET)
116 trace_softirqs_on((unsigned long)__builtin_return_address(0));
117 sub_preempt_count(SOFTIRQ_OFFSET);
120 EXPORT_SYMBOL(_local_bh_enable);
122 static inline void _local_bh_enable_ip(unsigned long ip)
124 WARN_ON_ONCE(in_irq() || irqs_disabled());
125 #ifdef CONFIG_TRACE_IRQFLAGS
129 * Are softirqs going to be turned on now:
131 if (softirq_count() == SOFTIRQ_OFFSET)
132 trace_softirqs_on(ip);
134 * Keep preemption disabled until we are done with
135 * softirq processing:
137 sub_preempt_count(SOFTIRQ_OFFSET - 1);
139 if (unlikely(!in_interrupt() && local_softirq_pending()))
143 #ifdef CONFIG_TRACE_IRQFLAGS
146 preempt_check_resched();
149 void local_bh_enable(void)
151 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
153 EXPORT_SYMBOL(local_bh_enable);
155 void local_bh_enable_ip(unsigned long ip)
157 _local_bh_enable_ip(ip);
159 EXPORT_SYMBOL(local_bh_enable_ip);
162 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
163 * and we fall back to softirqd after that.
165 * This number has been established via experimentation.
166 * The two things to balance is latency against fairness -
167 * we want to handle softirqs as soon as possible, but they
168 * should not be able to lock up the box.
170 #define MAX_SOFTIRQ_RESTART 10
172 asmlinkage void __do_softirq(void)
174 struct softirq_action *h;
176 int max_restart = MAX_SOFTIRQ_RESTART;
179 pending = local_softirq_pending();
180 account_system_vtime(current);
182 __local_bh_disable((unsigned long)__builtin_return_address(0));
183 trace_softirq_enter();
185 cpu = smp_processor_id();
187 /* Reset the pending bitmask before enabling irqs */
188 set_softirq_pending(0);
196 int prev_count = preempt_count();
200 if (unlikely(prev_count != preempt_count())) {
201 printk(KERN_ERR "huh, entered softirq %td %p"
202 "with preempt_count %08x,"
203 " exited with %08x?\n", h - softirq_vec,
204 h->action, prev_count, preempt_count());
205 preempt_count() = prev_count;
208 rcu_bh_qsctr_inc(cpu);
216 pending = local_softirq_pending();
217 if (pending && --max_restart)
223 trace_softirq_exit();
225 account_system_vtime(current);
229 #ifndef __ARCH_HAS_DO_SOFTIRQ
231 asmlinkage void do_softirq(void)
239 local_irq_save(flags);
241 pending = local_softirq_pending();
246 local_irq_restore(flags);
252 * Enter an interrupt context.
256 int cpu = smp_processor_id();
258 if (idle_cpu(cpu) && !in_interrupt()) {
260 tick_check_idle(cpu);
265 #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
266 # define invoke_softirq() __do_softirq()
268 # define invoke_softirq() do_softirq()
272 * Exit an interrupt context. Process softirqs if needed and possible:
276 account_system_vtime(current);
277 trace_hardirq_exit();
278 sub_preempt_count(IRQ_EXIT_OFFSET);
279 if (!in_interrupt() && local_softirq_pending())
283 /* Make sure that timer wheel updates are propagated */
284 if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
285 tick_nohz_stop_sched_tick(0);
288 preempt_enable_no_resched();
292 * This function must run with irqs disabled!
294 inline void raise_softirq_irqoff(unsigned int nr)
296 __raise_softirq_irqoff(nr);
299 * If we're in an interrupt or softirq, we're done
300 * (this also catches softirq-disabled code). We will
301 * actually run the softirq once we return from
302 * the irq or softirq.
304 * Otherwise we wake up ksoftirqd to make sure we
305 * schedule the softirq soon.
311 void raise_softirq(unsigned int nr)
315 local_irq_save(flags);
316 raise_softirq_irqoff(nr);
317 local_irq_restore(flags);
320 void open_softirq(int nr, void (*action)(struct softirq_action *))
322 softirq_vec[nr].action = action;
328 struct tasklet_struct *head;
329 struct tasklet_struct **tail;
332 static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
333 static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
335 void __tasklet_schedule(struct tasklet_struct *t)
339 local_irq_save(flags);
341 *__get_cpu_var(tasklet_vec).tail = t;
342 __get_cpu_var(tasklet_vec).tail = &(t->next);
343 raise_softirq_irqoff(TASKLET_SOFTIRQ);
344 local_irq_restore(flags);
347 EXPORT_SYMBOL(__tasklet_schedule);
349 void __tasklet_hi_schedule(struct tasklet_struct *t)
353 local_irq_save(flags);
355 *__get_cpu_var(tasklet_hi_vec).tail = t;
356 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
357 raise_softirq_irqoff(HI_SOFTIRQ);
358 local_irq_restore(flags);
361 EXPORT_SYMBOL(__tasklet_hi_schedule);
363 static void tasklet_action(struct softirq_action *a)
365 struct tasklet_struct *list;
368 list = __get_cpu_var(tasklet_vec).head;
369 __get_cpu_var(tasklet_vec).head = NULL;
370 __get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
374 struct tasklet_struct *t = list;
378 if (tasklet_trylock(t)) {
379 if (!atomic_read(&t->count)) {
380 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
391 *__get_cpu_var(tasklet_vec).tail = t;
392 __get_cpu_var(tasklet_vec).tail = &(t->next);
393 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
398 static void tasklet_hi_action(struct softirq_action *a)
400 struct tasklet_struct *list;
403 list = __get_cpu_var(tasklet_hi_vec).head;
404 __get_cpu_var(tasklet_hi_vec).head = NULL;
405 __get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
409 struct tasklet_struct *t = list;
413 if (tasklet_trylock(t)) {
414 if (!atomic_read(&t->count)) {
415 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
426 *__get_cpu_var(tasklet_hi_vec).tail = t;
427 __get_cpu_var(tasklet_hi_vec).tail = &(t->next);
428 __raise_softirq_irqoff(HI_SOFTIRQ);
434 void tasklet_init(struct tasklet_struct *t,
435 void (*func)(unsigned long), unsigned long data)
439 atomic_set(&t->count, 0);
444 EXPORT_SYMBOL(tasklet_init);
446 void tasklet_kill(struct tasklet_struct *t)
449 printk("Attempt to kill tasklet from interrupt\n");
451 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
454 while (test_bit(TASKLET_STATE_SCHED, &t->state));
456 tasklet_unlock_wait(t);
457 clear_bit(TASKLET_STATE_SCHED, &t->state);
460 EXPORT_SYMBOL(tasklet_kill);
462 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
463 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
465 static void __local_trigger(struct call_single_data *cp, int softirq)
467 struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
469 list_add_tail(&cp->list, head);
471 /* Trigger the softirq only if the list was previously empty. */
472 if (head->next == &cp->list)
473 raise_softirq_irqoff(softirq);
476 #ifdef CONFIG_USE_GENERIC_SMP_HELPERS
477 static void remote_softirq_receive(void *data)
479 struct call_single_data *cp = data;
485 local_irq_save(flags);
486 __local_trigger(cp, softirq);
487 local_irq_restore(flags);
490 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
492 if (cpu_online(cpu)) {
493 cp->func = remote_softirq_receive;
498 __smp_call_function_single(cpu, cp);
503 #else /* CONFIG_USE_GENERIC_SMP_HELPERS */
504 static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
511 * __send_remote_softirq - try to schedule softirq work on a remote cpu
512 * @cp: private SMP call function data area
513 * @cpu: the remote cpu
514 * @this_cpu: the currently executing cpu
515 * @softirq: the softirq for the work
517 * Attempt to schedule softirq work on a remote cpu. If this cannot be
518 * done, the work is instead queued up on the local cpu.
520 * Interrupts must be disabled.
522 void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
524 if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
525 __local_trigger(cp, softirq);
527 EXPORT_SYMBOL(__send_remote_softirq);
530 * send_remote_softirq - try to schedule softirq work on a remote cpu
531 * @cp: private SMP call function data area
532 * @cpu: the remote cpu
533 * @softirq: the softirq for the work
535 * Like __send_remote_softirq except that disabling interrupts and
536 * computing the current cpu is done for the caller.
538 void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
543 local_irq_save(flags);
544 this_cpu = smp_processor_id();
545 __send_remote_softirq(cp, cpu, this_cpu, softirq);
546 local_irq_restore(flags);
548 EXPORT_SYMBOL(send_remote_softirq);
550 static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
551 unsigned long action, void *hcpu)
554 * If a CPU goes away, splice its entries to the current CPU
555 * and trigger a run of the softirq
557 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
558 int cpu = (unsigned long) hcpu;
562 for (i = 0; i < NR_SOFTIRQS; i++) {
563 struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
564 struct list_head *local_head;
566 if (list_empty(head))
569 local_head = &__get_cpu_var(softirq_work_list[i]);
570 list_splice_init(head, local_head);
571 raise_softirq_irqoff(i);
579 static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
580 .notifier_call = remote_softirq_cpu_notify,
583 void __init softirq_init(void)
587 for_each_possible_cpu(cpu) {
590 per_cpu(tasklet_vec, cpu).tail =
591 &per_cpu(tasklet_vec, cpu).head;
592 per_cpu(tasklet_hi_vec, cpu).tail =
593 &per_cpu(tasklet_hi_vec, cpu).head;
594 for (i = 0; i < NR_SOFTIRQS; i++)
595 INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
598 register_hotcpu_notifier(&remote_softirq_cpu_notifier);
600 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
601 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
604 static int ksoftirqd(void * __bind_cpu)
606 set_current_state(TASK_INTERRUPTIBLE);
608 while (!kthread_should_stop()) {
610 if (!local_softirq_pending()) {
611 preempt_enable_no_resched();
616 __set_current_state(TASK_RUNNING);
618 while (local_softirq_pending()) {
619 /* Preempt disable stops cpu going offline.
620 If already offline, we'll be on wrong CPU:
622 if (cpu_is_offline((long)__bind_cpu))
625 preempt_enable_no_resched();
630 set_current_state(TASK_INTERRUPTIBLE);
632 __set_current_state(TASK_RUNNING);
637 /* Wait for kthread_stop */
638 set_current_state(TASK_INTERRUPTIBLE);
639 while (!kthread_should_stop()) {
641 set_current_state(TASK_INTERRUPTIBLE);
643 __set_current_state(TASK_RUNNING);
647 #ifdef CONFIG_HOTPLUG_CPU
649 * tasklet_kill_immediate is called to remove a tasklet which can already be
650 * scheduled for execution on @cpu.
652 * Unlike tasklet_kill, this function removes the tasklet
653 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
655 * When this function is called, @cpu must be in the CPU_DEAD state.
657 void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
659 struct tasklet_struct **i;
661 BUG_ON(cpu_online(cpu));
662 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
664 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
667 /* CPU is dead, so no lock needed. */
668 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
671 /* If this was the tail element, move the tail ptr */
673 per_cpu(tasklet_vec, cpu).tail = i;
680 static void takeover_tasklets(unsigned int cpu)
682 /* CPU is dead, so no lock needed. */
685 /* Find end, append list for that CPU. */
686 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
687 *(__get_cpu_var(tasklet_vec).tail) = per_cpu(tasklet_vec, cpu).head;
688 __get_cpu_var(tasklet_vec).tail = per_cpu(tasklet_vec, cpu).tail;
689 per_cpu(tasklet_vec, cpu).head = NULL;
690 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
692 raise_softirq_irqoff(TASKLET_SOFTIRQ);
694 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
695 *__get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).head;
696 __get_cpu_var(tasklet_hi_vec).tail = per_cpu(tasklet_hi_vec, cpu).tail;
697 per_cpu(tasklet_hi_vec, cpu).head = NULL;
698 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
700 raise_softirq_irqoff(HI_SOFTIRQ);
704 #endif /* CONFIG_HOTPLUG_CPU */
706 static int __cpuinit cpu_callback(struct notifier_block *nfb,
707 unsigned long action,
710 int hotcpu = (unsigned long)hcpu;
711 struct task_struct *p;
715 case CPU_UP_PREPARE_FROZEN:
716 p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
718 printk("ksoftirqd for %i failed\n", hotcpu);
721 kthread_bind(p, hotcpu);
722 per_cpu(ksoftirqd, hotcpu) = p;
725 case CPU_ONLINE_FROZEN:
726 wake_up_process(per_cpu(ksoftirqd, hotcpu));
728 #ifdef CONFIG_HOTPLUG_CPU
729 case CPU_UP_CANCELED:
730 case CPU_UP_CANCELED_FROZEN:
731 if (!per_cpu(ksoftirqd, hotcpu))
733 /* Unbind so it can run. Fall thru. */
734 kthread_bind(per_cpu(ksoftirqd, hotcpu),
735 any_online_cpu(cpu_online_map));
737 case CPU_DEAD_FROZEN: {
738 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
740 p = per_cpu(ksoftirqd, hotcpu);
741 per_cpu(ksoftirqd, hotcpu) = NULL;
742 sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m);
744 takeover_tasklets(hotcpu);
747 #endif /* CONFIG_HOTPLUG_CPU */
752 static struct notifier_block __cpuinitdata cpu_nfb = {
753 .notifier_call = cpu_callback
756 static __init int spawn_ksoftirqd(void)
758 void *cpu = (void *)(long)smp_processor_id();
759 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
761 BUG_ON(err == NOTIFY_BAD);
762 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
763 register_cpu_notifier(&cpu_nfb);
766 early_initcall(spawn_ksoftirqd);
770 * Call a function on all processors
772 int on_each_cpu(void (*func) (void *info), void *info, int wait)
777 ret = smp_call_function(func, info, wait);
784 EXPORT_SYMBOL(on_each_cpu);