Linux 内核源代码情景分析(一)(中):https://developer.aliyun.com/article/1597929
(2)open_softirq
// kernel/softirq.c void open_softirq(int nr, void (*action)(struct softirq_action*), void *data) { unsigned long flags; int i; spin_lock_irqsave(&softirq_mask_lock, flags); softirq_vec[nr].data = data; softirq_vec[nr].action = action; for (i=0; i<NR_CPUS; i++) softirq_mask(i) |= (1<<nr); spin_unlock_irqrestore(&softirq_mask_lock, flags); }
内核中为软中断设置了一个以“软中断号”为下标的数组 softirq_vec[],类似于中断机制中的 irq_desc[] 。
// include/linux/interrupt.h /* softirq mask and active fields moved to irq_cpustat_t in * asm/hardirq.h to get better cache usage. KAO */ struct softirq_action { void (*action)(struct softirq_action *); void *data; };
数组 softirq_vec 是个全局量,系统中的各个 CPU 所看到的是同一个数组。但是,每个 CPU 各有其自己的“软中断控制/状况结构”,所以这些数据结构形成一个以 CPU 编号为下标的数组 irq_ 这个数组也是全局量,但是各个CPU可以按其自身的编号访问相应的数据结构。我们把有关的定义列出于下,供读者自己阅读:
// include/asm-i386/hardirq.h /* entry.S is sensitive to the offsets of these fields */ typedef struct { unsigned int __softirq_active; unsigned int __softirq_mask; unsigned int __local_irq_count; unsigned int __local_bh_count; unsigned int __syscall_count; unsigned int __nmi_count; /* arch dependent */ } ____cacheline_aligned irq_cpustat_t; // =========================================================== // kernel/softirq.c irq_cpustat_t irq_stat[NR_CPUS]; struct tasklet_head tasklet_vec[NR_CPUS] __cacheline_aligned; struct tasklet_head tasklet_hi_vec[NR_CPUS] __cacheline_aligned; // =========================================================== // include/linux/irq_cpustat.h #ifdef CONFIG_SMP #define __IRQ_STAT(cpu, member) (irq_stat[cpu].member) #else #define __IRQ_STAT(cpu, member) ((void)(cpu), irq_stat[0].member) #endif /* arch independent irq_stat fields */ #define softirq_active(cpu) __IRQ_STAT((cpu), __softirq_active) #define softirq_mask(cpu) __IRQ_STAT((cpu), __softirq_mask)
(3)tasklet_schedule 和 tasklet_hi_schedule
应用参考 (1)tasklet
// include/linux/interrupt.h static inline void tasklet_schedule(struct tasklet_struct *t) { if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { int cpu = smp_processor_id(); unsigned long flags; local_irq_save(flags); t->next = tasklet_vec[cpu].list; tasklet_vec[cpu].list = t; __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); local_irq_restore(flags); } } static inline void tasklet_hi_schedule(struct tasklet_struct *t) { if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { int cpu = smp_processor_id(); unsigned long flags; local_irq_save(flags); t->next = tasklet_hi_vec[cpu].list; tasklet_hi_vec[cpu].list = t; __cpu_raise_softirq(cpu, HI_SOFTIRQ); local_irq_restore(flags); } }
这里的 smp_processor_id() 返回当前进程所在 CPU 的编号,然后以此为下标从 tasklet_hi_vec[] 中找到该 CPU 的队列头,把参数 t 所指的 tasklet_struct 数据结构链入这个队列。由此可见,对执行 bh 函数的要求是在哪一个 CPU 上提出的,就把它“调度”在哪一个 CPU 上执行,函数名中的 “schedule" 就是这个意思,而与“进程调度”毫无关系。另一方面,一个 tasklet_struct代表着对 bh 函数的一次执行,在同一时间内只能把它链入一个队列中,而不可能同时出现在多个队列中。对于同一个 tasklet_struct 数据结构,如果已经对其调用了 tasklet_hi_schedule() ,而尚未得到执行,就不允许再将其链入队列, 所以在数据结构中设置了一个标志位 TASKLET_STATE_SCHED 来保证这一点。最后,还要通过 __cpu_raise_softirq() 正式发出软中断请求。
(4)do_softirq
内核每当在 do_IRQ() 中执行完一个通道中的中断服务程序以后,以及每当从系统调用返回时,都要检查是否有软中断请求在等待执行。下面是 do_IRQ() 中的一个片段:
// arch/i386/kernel/irq.c asmlinkage unsigned int do_IRQ(struct pt_regs regs) { // ... if (softirq_active(cpu) & softirq_mask(cpu)) do_softirq(); return 1; }
另一段代码取自 arch/i386/entry.S,这是在从系统调用返回时执行的:
// arch/i386/kernel/entry.S ENTRY(ret_from_sys_call) #ifdef CONFIG_SMP movl processor(%ebx),%eax shll $CONFIG_X86_L1_CACHE_SHIFT,%eax movl SYMBOL_NAME(irq_stat)(,%eax),%ecx # softirq_active testl SYMBOL_NAME(irq_stat)+4(,%eax),%ecx # softirq_mask #else movl SYMBOL_NAME(irq_stat),%ecx # softirq_active testl SYMBOL_NAME(irq_stat)+4,%ecx # softirq_mask #endif jne handle_softirq handle_softirq: call SYMBOL_NAME(do_softirq) jmp ret_from_intr
注意,这里的 processor 表示 task_struct 数据结构中该字段的位移,所以207行是从当前进程的 task_struct 数据结构中取当前 CPU 的编号。而 SYMBOL_NAME(irq_stat)(,%eax)则相当于 irq_stat[cpu], 并且是其中第一个字段;相应地,SYMBOL_NAME(irq_stat)+4(,%eax)相当这个数据结构中的第二个字段,并且第一个字段必须是 32 位。读者不妨回过去看一下 irq_cpustat_t 的定义,在那里有个注释, 说 entry.S 中的代码对这个数据结构中的字段位置敏感,就是这个意思。所以,这些汇编代码实际上与上面 do_IRQ() 中的两行C代码是一样的。
检测到软中断请求以后,就要通过 do_softirq() 加以执行了。其代码在 kemel/softirq.c中:
// kernel/softirq.c asmlinkage void do_softirq() { int cpu = smp_processor_id(); __u32 active, mask; if (in_interrupt()) return; local_bh_disable(); local_irq_disable(); mask = softirq_mask(cpu); active = softirq_active(cpu) & mask; if (active) { struct softirq_action *h; restart: /* Reset active bitmask before enabling irqs */ softirq_active(cpu) &= ~active; local_irq_enable(); h = softirq_vec; mask &= ~active; do { if (active & 1) h->action(h); h++; active >>= 1; } while (active); local_irq_disable(); active = softirq_active(cpu); if ((active &= mask) != 0) goto retry; } local_bh_enable(); /* Leave with locally disabled hard irqs. It is critical to close * window for infinite recursion, while we help local bh count, * it protected us. Now we are defenceless. */ return; retry: goto restart; }
软中断服务程序既不允许在一个硬中断服务程序内部执行,也不允许在一个软中断服务程序内部执行,所以要通过 1 个宏操作 in_interrupt() 加以检测,这是在 include/asm-i386/hardirq.h 中定义的。
在 2.4 版本中 h->action,执行的服务程序有 bh_action()、tasklet_action()、tasklet_hi_action()。后续版本 bh_action() 已去掉。
(5)tasklet_action
// kernel/softirq.c static void tasklet_action(struct softirq_action *a) { int cpu = smp_processor_id(); struct tasklet_struct *list; local_irq_disable(); list = tasklet_vec[cpu].list; tasklet_vec[cpu].list = NULL; local_irq_enable(); while (list != NULL) { struct tasklet_struct *t = list; list = list->next; if (tasklet_trylock(t)) { if (atomic_read(&t->count) == 0) { clear_bit(TASKLET_STATE_SCHED, &t->state); t->func(t->data); /* * talklet_trylock() uses test_and_set_bit that imply * an mb when it returns zero, thus we need the explicit * mb only here: while closing the critical section. */ #ifdef CONFIG_SMP smp_mb__before_clear_bit(); #endif tasklet_unlock(t); continue; } tasklet_unlock(t); } local_irq_disable(); t->next = tasklet_vec[cpu].list; tasklet_vec[cpu].list = t; __cpu_raise_softirq(cpu, TASKLET_SOFTIRQ); local_irq_enable(); } }
应用参考 (1)tasklet
这里 t->func(t->data) 调用使用 DECLARE_TASKLET(name, function, data) 定义的 tasklet_struct 结构中的 function 函数。
// include/linux/interrupt.h /* Tasklets --- multithreaded analogue of BHs. Main feature differing them of generic softirqs: tasklet is running only on one CPU simultaneously. Main feature differing them of BHs: different tasklets may be run simultaneously on different CPUs. Properties: * If tasklet_schedule() is called, then tasklet is guaranteed to be executed on some cpu at least once after this. * If the tasklet is already scheduled, but its excecution is still not started, it will be executed only once. * If this tasklet is already running on another CPU (or schedule is called from tasklet itself), it is rescheduled for later. * Tasklet is strictly serialized wrt itself, but not wrt another tasklets. If client needs some intertask synchronization, he makes it with spinlocks. */ struct tasklet_struct { struct tasklet_struct *next; unsigned long state; atomic_t count; void (*func)(unsigned long); unsigned long data; }; #define DECLARE_TASKLET(name, func, data) \ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(0), func, data } #define DECLARE_TASKLET_DISABLED(name, func, data) \ struct tasklet_struct name = { NULL, 0, ATOMIC_INIT(1), func, data }