软中断执行->Bottom-half Enable后
void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) { WARN_ON_ONCE(in_irq() || irqs_disabled()); /* 如果已经关闭了中断调用__local_bh_enable_ip就会警告 原因:关中断比关Bottom-half级别更高,已经关了中断就 没必要在管Bottom-half了 */ #ifdef CONFIG_TRACE_IRQFLAGS local_irq_disable(); #endif /* * Are softirqs going to be turned on now: */ if (softirq_count() == SOFTIRQ_DISABLE_OFFSET) trace_softirqs_on(ip); /* * Keep preemption disabled until we are done with * softirq processing: */ preempt_count_sub(cnt - 1); /* preempt_count_sub与preempt_count_add配对使用(用于操作thread_info->preempt_count 字段,加与减的值是一致的),先减去cnt-1, preempt_count不为0,禁止了抢占 */ if (unlikely(!in_interrupt() && local_softirq_pending())) { /*软中断处理 并发处理时,可能已经把Bottom-half进行关闭了,如果此时中断来 了后,软中断不会被处理,在进程上下文中打开Bottom-half时,这 时候就会检查是否有软中断处理请求了 * Run softirq if any pending. And do it in its own stack * as we may be calling this deep in a task call stack already. */ do_softirq(); } preempt_count_dec();//preempt_count再减去1,刚好总共减去cnt值 #ifdef CONFIG_TRACE_IRQFLAGS local_irq_enable(); #endif preempt_check_resched();//检查是否有调度请求 }
tasklet
tasklet是软中断的一种类型,但是软中断类型内核中都是静态分配,不支持动态分配,而tasklet支持动态和静态分配,也就是驱动程序中能比较方便的进行扩展;软中断可以在多个CPU上并行运行,因此需要考虑可重入问题,而tasklet会绑定在某个CPU上运行,运行完后再解绑,不要求重入问题,当然它的性能也就会下降一些。
static inline void tasklet_schedule(struct tasklet_struct *t) { if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))//测试并设置tasklet的状态 __tasklet_schedule(t); }
void __tasklet_schedule(struct tasklet_struct *t) { unsigned long flags; local_irq_save(flags);//关闭本地中断 t->next = NULL; *__this_cpu_read(tasklet_vec.tail) = t;//将tasklet添加到本地CPU的tasklet_vec中 __this_cpu_write(tasklet_vec.tail, &(t->next)); raise_softirq_irqoff(TASKLET_SOFTIRQ);//触发软中断执行 local_irq_restore(flags); }
tasklet本质上是一种软中断
工作队列
Workqueue工作队列是利用内核线程来异步执行工作任务的通用机制;Workqueue工作队列可以用作中断处理的Bottom-half机制,利用进程上下文来执行中断处理中耗时的任务,因此它允许睡眠,而Softirq和Tasklet在处理任务时不能睡眠;
struct work_struct { atomic_long_t data;//低比特存放状态位,高比特存放worker_pool的ID或者pool_workqueue的指针 struct list_head entry;//用于添加到其他队列上 work_func_t func;//工作任务的处理函数,在内核线程中回调 #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif };
驱动程序要通过工作队列实现延迟操作时,需要生成一个struct work_struct对象(工作节点),然后通过queue-work函数将其提交给工作队列。
struct workqueue_struct { struct list_head pwqs; /*维护链表管理pool_workqueue WR: all pwqs of this wq */ struct list_head list; /*添加到全局链表中 PR: list of all workqueues */ struct mutex mutex; /* protects this wq */ int work_color; /* WQ: current work color */ int flush_color; /* WQ: current flush color */ atomic_t nr_pwqs_to_flush; /* flush in progress */ struct wq_flusher *first_flusher; /* WQ: first flusher */ struct list_head flusher_queue; /* WQ: flush waiters */ struct list_head flusher_overflow; /* WQ: flush overflow list */ struct list_head maydays; /* rescue状态下的pool_workqueue添加到本链表中MD: pwqs requesting rescue */ struct worker *rescuer; /* rescuer内核线程,用于处理内存紧张时创建工作线程失败的情况I: rescue worker */ int nr_drainers; /* WQ: drain in progress */ int saved_max_active; /* WQ: saved pwq max_active */ struct workqueue_attrs *unbound_attrs; /* WQ: only for unbound wqs */ struct pool_workqueue *dfl_pwq; /* WQ: only for unbound wqs */ #ifdef CONFIG_SYSFS struct wq_device *wq_dev; /* I: for sysfs interface */ #endif #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif char name[WQ_NAME_LEN]; /* I: workqueue name */ /* * Destruction of workqueue_struct is sched-RCU protected to allow * walking the workqueues list without grabbing wq_pool_mutex. * This is used to dump all workqueues from sysrq. */ struct rcu_head rcu; /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /*Per-CPU都创建pool_workqueueWQ: WQ_* flags */ struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */ struct pool_workqueue __rcu *numa_pwq_tbl[]; /*Per-Node创建pool_workqueue FR: unbound pwqs indexed by node */ };
struct worker { /* on idle list while idle, on busy hash table while busy */ union { struct list_head entry; /*用于添加到worker_pool的空闲链表中 L: while idle */ struct hlist_node hentry; /* 用于添加到worker_pool的忙碌列表中L: while busy */ }; struct work_struct *current_work; /*当前正在处理的work L: work being processed */ work_func_t current_func; /* 当前正在执行的work回调函数L: current_work's fn */ struct pool_workqueue *current_pwq; /*指向当前work所属的pool_workqueue L: current_work's pwq */ bool desc_valid; /* ->desc is valid */ struct list_head scheduled; /* 所有被调度执行的work都将添加到该链表中L: scheduled works */ /* 64 bytes boundary on 64bit, 32 on 32bit */ struct task_struct *task; /*指向内核线程 I: worker task */ struct worker_pool *pool; /*该worker所属的worker_pool I: the associated pool */ /* L: for rescuers */ struct list_head node; /*添加到worker_pool->workers链表中 A: anchored at pool->workers */ /* A: runs through worker->node */ unsigned long last_active; /* L: last active timestamp */ unsigned int flags; /* X: flags */ int id; /* I: worker id */ /* * Opaque string set with work_set_desc(). Printed out with task * dump for debugging - WARN, BUG, panic or sysrq. */ char desc[WORKER_DESC_LEN]; /* used only by rescuers to point to the target workqueue */ struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ };
struct worker_pool { spinlock_t lock; /* the pool lock */ int cpu; /* 绑定到CPU的workqueue,代表CPU IDI: the associated cpu */ int node; /* 非绑定类型的workqueue,代表内存Node IDI: the associated node ID */ int id; /* I: pool ID */ unsigned int flags; /* X: flags */ struct list_head worklist; /*pending状态的work添加到本链表 L: list of pending works */ int nr_workers; /* pending状态的work添加到本链表L: total number of workers */ /* nr_idle includes the ones off idle_list for rebinding */ int nr_idle; /* L: currently idle ones */ struct list_head idle_list; /*处于IDLE状态的worker添加到本链表 X: list of idle workers */ struct timer_list idle_timer; /* L: worker idle timeout */ struct timer_list mayday_timer; /* L: SOS timer for workers */ /* a workers is either on busy_hash or idle_list, or the manager */ DECLARE_HASHTABLE(busy_hash, BUSY_WORKER_HASH_ORDER); /*工作状态的worker添加到本哈希表中 L: hash of busy workers */ /* see manage_workers() for details on the two manager mutexes */ struct mutex manager_arb; /* manager arbitration */ struct worker *manager; /* L: purely informational */ struct mutex attach_mutex; /* attach/detach exclusion */ struct list_head workers; /*worker_pool管理的worker添加到本链表中 A: attached workers */ struct completion *detach_completion; /* all workers detached */ struct ida worker_ida; /* worker IDs for task name */ struct workqueue_attrs *attrs; /* I: worker attributes */ struct hlist_node hash_node; /*用于添加到unbound_pool_hash中 PL: unbound_pool_hash node */ int refcnt; /* PL: refcnt for unbound pools */ /* * The current concurrency level. As it's likely to be accessed * from other CPUs during try_to_wake_up(), put it in a separate * cacheline. */ atomic_t nr_running ____cacheline_aligned_in_smp; /* * Destruction of pool is sched-RCU protected to allow dereferences * from get_work_pool(). */ struct rcu_head rcu; } ____cacheline_aligned_in_smp;
worker_pool是一个资源池,管理多个worker,也就是管理多个内核线程;
针对绑定类型的工作队列,worker_pool是Per-CPU创建,每个CPU都有两个worker_pool,对应不同的优先级,nice值分别为0和-20;
针对非绑定类型的工作队列,worker_pool创建后会添加到unbound_pool_hash哈希表中;
worker_pool管理一个空闲链表和一个忙碌列表,其中忙碌列表由哈希管理;
struct pool_workqueue { struct worker_pool *pool; /*指向worker_pool I: the associated pool */ struct workqueue_struct *wq; /*指向所属的workqueue I: the owning workqueue */ int work_color; /* L: current color */ int flush_color; /* L: flushing color */ int refcnt; /* L: reference count */ int nr_in_flight[WORK_NR_COLORS]; /* L: nr of in_flight works */ int nr_active; /*活跃的work数量 L: nr of active works */ int max_active; /*活跃的最大work数量 L: max active works */ struct list_head delayed_works; /*延迟执行的work挂入本链表 L: delayed works */ struct list_head pwqs_node; /*用于添加到workqueue链表中 WR: node on wq->pwqs */ struct list_head mayday_node; /*用于添加到workqueue链表中 MD: node on wq->maydays */ /* * Release of unbound pwq is punted to system_wq. See put_pwq() * and pwq_unbound_release_workfn() for details. pool_workqueue * itself is also sched-RCU protected so that the first pwq can be * determined without grabbing wq->mutex. */ struct work_struct unbound_release_work; struct rcu_head rcu; } __aligned(1 << WORK_STRUCT_FLAG_BITS);
总结
上半部称为硬中断(hardirq),下半部有3种:软中断(softirq)、小任务(tasklet)和工作队列(workqueue)。3种下半部的区别:
- 软中断和小任务不允许睡眠,工作队列是使用内核线程实现的,处理函数可以睡眠。
- 软中断的种类是编译时态定义的,在运行时不能添加或删除;小任务可以在运行时添加或删除。
- 同一种软中断的处理函数可以在多个处理器上同时执行,处理函数必须是可以重入的,需要使用锁保护临界区;一个小任务同一时刻只能在一个处理器上执行,不要求处理函数是可以重入的。