什么是进程
操作系统作为硬件的使用层,提供使用硬件资源的能力,进程作为操作系统使用层,提供使用操作系统抽象出的资源层的能力。
进程:是指计算机中己运行的程序。进程本身不是基本的运行单位,而是线程的容器。程序本身只是指令、数据及其组织形式的描述,进程才是程序(那些指令和数据)的真正运行实例。
进程的四要要素
- 有一段程序代其执行,
- 有进程专用的系统堆栈空间;
- 在内核有taskstruct数据结构,
- 进程有独立的存储空间,拥有专有的用户空间;
如果具备前面三条的话而缺少第四条,那就称为"线程"。
如果完全没有用户空间,就称为"内核线程";如果共享用户空间则就称为"用户线程"。
进程生命周期
Linux操作系统属于多任务操作系统,系统中的每个进程能够分时复用CPU时间片,通过有效的进程调度策略实现多任务并行执行。而进程在被CPU调度运行,等待CPU资源分配以及等待外部事件时会属于不同的状态。进程之间的状态关系:
运行:该进程此刻正在执行。
等待:进程能够运行,但没有得到许可,因为CPU分配给另一个进程。调度器可以在下次任务切换时结束该进程。
睡眠:进程正在睡眠无法运行,因为它在等待一个外部事件。调度器无法在下一次任务切换时选择该进程。
task_struct数据结构
Linux内核涉及进程和程序的所有算法都围绕一个名为task_struct的数据结构建立,该结构定义在include/linux/sched.h中。这是系统中主要的,个结构。在阐述调度器的实现之前,了解一下Linux管理进程的方式是很有必要的。
task_struct包含很多成员,将进程与各个内核子系统联系,task_struct定义如下:
struct task_struct {//进程描述符 volatile long state; /*进程状态一1:就绪态0:运仃态>0:停止态 -1 unrunnable, 0 runnable, >0 stopped */ void *stack;//指向内核栈指针 atomic_t usage;//有几个进程在使用此结构 unsigned int flags; /*标记 per process flags, defined below */ unsigned int ptrace;//ptrace系统调用,实现断电测试,跟踪进程运行 #ifdef CONFIG_SMP struct llist_node wake_entry; int on_cpu; struct task_struct *last_wakee; unsigned long wakee_flips; unsigned long wakee_flip_decay_ts; int wake_cpu; #endif int on_rq;//运行队列和进程调试相关程序 int prio, static_prio, normal_prio;//关于进程调度 unsigned int rt_priority;//优先级 //关于进程 const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif struct sched_dl_entity dl; #ifdef CONFIG_PREEMPT_NOTIFIERS /* list of struct preempt_notifier: */ struct hlist_head preempt_notifiers; #endif //块设备跟踪工具 #ifdef CONFIG_BLK_DEV_IO_TRACE unsigned int btrace_seq; #endif //进程调度策略相关字段 unsigned int policy; int nr_cpus_allowed; cpumask_t cpus_allowed; //RCU同步原语 #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; union rcu_special rcu_read_unlock_special; struct list_head rcu_node_entry; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_PREEMPT_RCU struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TASKS_RCU unsigned long rcu_tasks_nvcsw; bool rcu_tasks_holdout; struct list_head rcu_tasks_holdout_list; int rcu_tasks_idle_cpu; #endif /* #ifdef CONFIG_TASKS_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) struct sched_info sched_info; #endif //进程架构链表 struct list_head tasks; #ifdef CONFIG_SMP struct plist_node pushable_tasks; struct rb_node pushable_dl_tasks; #endif /*进程管理进程的地址空间,每个进程有独立的地址空间4G,32位X86*/ struct mm_struct *mm, *active_mm; #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif /* per-thread vma caching */ u32 vmacache_seqnum; struct vm_area_struct *vmacache[VMACACHE_SIZE]; #if defined(SPLIT_RSS_COUNTING) struct task_rss_stat rss_stat; #endif //进程状态参数 /* task state */ int exit_state; int exit_code, exit_signal; int pdeath_signal; /* 接收父进程终止时就会发出信号 The signal sent when the parent dies */ unsigned int jobctl; /* JOBCTL_*, siglock protected */ /* Used for emulating ABI behavior of previous Linux versions */ unsigned int personality; unsigned in_execve:1; /* Tell the LSMs that the process is doing an * execve */ unsigned in_iowait:1; /* Revert to default priority/policy when forking */ unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; #ifdef CONFIG_MEMCG_KMEM unsigned memcg_kmem_skip_account:1; #endif unsigned long atomic_flags; /* Flags needing atomic access. */ struct restart_block restart_block; pid_t pid;//进程pid pid_t tgid;//父进程 #ifdef CONFIG_CC_STACKPROTECTOR /* Canary value for the -fstack-protector gcc feature */ unsigned long stack_canary;//防止内核堆栈溢出 #endif /* * pointers to (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ struct task_struct __rcu *real_parent; /*初始化父进程 real parent process */ struct task_struct __rcu *parent; /* 接收终止进程recipient of SIGCHLD, wait4() reports */ /* * children/sibling forms the list of my natural children */ struct list_head children; //维护子进程链表 /* list of my children */ struct list_head sibling; /*兄弟进程链表 linkage in my parent's children list */ struct task_struct *group_leader; /*线程组组长 threadgroup leader */ /* * ptraced is the list of tasks this task is using ptrace on. * This includes both natural children and PTRACE_ATTACH targets. * p->ptrace_entry is p's link on the p->parent->ptraced list. */ struct list_head ptraced;//系统调用,关于断开调试 struct list_head ptrace_entry; /* PID/PID hash table linkage. */ struct pid_link pids[PIDTYPE_MAX];//pid/pid散列表的关系 struct list_head thread_group; struct list_head thread_node; //for_fork()函数 struct completion *vfork_done; /* for vfork() */ int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ //描还cpu的时间内容 //utime用户态下的执行时间 //stime内核态下的执行时间 cputime_t utime, stime, utimescaled, stimescaled; cputime_t gtime; #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE struct cputime prev_cputime; #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqlock_t vtime_seqlock; unsigned long long vtime_snap; enum { VTIME_SLEEPING = 0, VTIME_USER, VTIME_SYS, } vtime_snap_whence; #endif unsigned long nvcsw, nivcsw; /* context switch counts */ u64 start_time; /* monotonic time in nsec */ u64 real_start_time; /* boot based time in nsec */ /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; struct task_cputime cputime_expires; struct list_head cpu_timers[3]; /* process credentials */ const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) - initialized normally by setup_new_exec */ /* file system info */ int link_count, total_link_count; #ifdef CONFIG_SYSVIPC /* ipc stuff */ struct sysv_sem sysvsem; struct sysv_shm sysvshm; #endif #ifdef CONFIG_DETECT_HUNG_TASK /* hung task detection */ unsigned long last_switch_count; #endif /* CPU-specific state of this task */ struct thread_struct thread; /* filesystem information */ struct fs_struct *fs; /* open file information */ struct files_struct *files; /* namespaces */ struct nsproxy *nsproxy; /* signal handlers */ struct signal_struct *signal; struct sighand_struct *sighand; sigset_t blocked, real_blocked; sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */ struct sigpending pending; unsigned long sas_ss_sp; size_t sas_ss_size; int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; struct callback_head *task_works; struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL kuid_t loginuid; unsigned int sessionid; #endif struct seccomp seccomp; /* Thread group tracking */ u32 parent_exec_id; u32 self_exec_id; /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, * mempolicy */ spinlock_t alloc_lock; /* Protection of the PI data structures: */ raw_spinlock_t pi_lock; #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task */ struct rb_root pi_waiters; struct rb_node *pi_waiters_leftmost; /* Deadlock detection and priority inheritance handling */ struct rt_mutex_waiter *pi_blocked_on; #endif #ifdef CONFIG_DEBUG_MUTEXES /* mutex deadlock detection */ struct mutex_waiter *blocked_on; #endif #ifdef CONFIG_TRACE_IRQFLAGS unsigned int irq_events; unsigned long hardirq_enable_ip; unsigned long hardirq_disable_ip; unsigned int hardirq_enable_event; unsigned int hardirq_disable_event; int hardirqs_enabled; int hardirq_context; unsigned long softirq_disable_ip; unsigned long softirq_enable_ip; unsigned int softirq_disable_event; unsigned int softirq_enable_event; int softirqs_enabled; int softirq_context; #endif #ifdef CONFIG_LOCKDEP # define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; unsigned int lockdep_recursion; struct held_lock held_locks[MAX_LOCK_DEPTH]; gfp_t lockdep_reclaim_gfp; #endif /* journalling filesystem info */ void *journal_info;//日志文件系统信息 /* stacked block device info */ struct bio_list *bio_list;//块设备i岸标 #ifdef CONFIG_BLOCK /* stack plugging */ struct blk_plug *plug; #endif /* VM state *///虚拟内存状态参数 struct reclaim_state *reclaim_state;//虚拟内存状态,内存回收 struct backing_dev_info *backing_dev_info;//存放款设备I/O流量信息 struct io_context *io_context;//I/O调度器所用的信息 unsigned long ptrace_message; siginfo_t *last_siginfo; /* For ptrace use. */ struct task_io_accounting ioac;//记录进程I/O计数 #if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ cputime_t acct_timexpd; /* stime + utime since last update */ #endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; /* Protected by alloc_lock */ seqcount_t mems_allowed_seq; /* Seqence no to catch updates */ int cpuset_mem_spread_rotor; int cpuset_slab_spread_rotor; #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock */ struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif //futex同步机制 #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT struct compat_robust_list_head __user *compat_robust_list; #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif //内存检测工具Performance Event #ifdef CONFIG_PERF_EVENTS struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif #ifdef CONFIG_DEBUG_PREEMPT unsigned long preempt_disable_ip; #endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; /* Protected by alloc_lock */ short il_next; short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING int numa_scan_seq; unsigned int numa_scan_period; unsigned int numa_scan_period_max; int numa_preferred_nid; unsigned long numa_migrate_retry; u64 node_stamp; /* migration stamp */ u64 last_task_numa_placement; u64 last_sum_exec_runtime; struct callback_head numa_work; struct list_head numa_entry; struct numa_group *numa_group; /* * numa_faults is an array split into four regions: * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer * in this precise order. * * faults_memory: Exponential decaying average of faults on a per-node * basis. Scheduling placement decisions are made based on these * counts. The values remain static for the duration of a PTE scan. * faults_cpu: Track the nodes the process was running on when a NUMA * hinting fault was incurred. * faults_memory_buffer and faults_cpu_buffer: Record faults per node * during the current scan window. When the scan completes, the counts * in faults_memory and faults_cpu decay and these values are copied. */ unsigned long *numa_faults; unsigned long total_numa_faults; /* * numa_faults_locality tracks if faults recorded during the last * scan window were remote/local or failed to migrate. The task scan * period is adapted based on the locality of the faults with different * weights depending on whether they were shared or private faults */ unsigned long numa_faults_locality[3]; unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ struct rcu_head rcu;//rcu链表 /* * cache last used pipe for splice */ struct pipe_inode_info *splice_pipe;//管道 struct page_frag task_frag; //延迟计数 #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info *delays; #endif #ifdef CONFIG_FAULT_INJECTION int make_it_fail; #endif /* * when (nr_dirtied >= nr_dirtied_pause), it's time to call * balance_dirty_pages() for some dirty throttling pause */ int nr_dirtied; int nr_dirtied_pause; unsigned long dirty_paused_when; /* start of a write-and-pause period */ #ifdef CONFIG_LATENCYTOP int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; #endif /* * time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds. */ unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; #ifdef CONFIG_KASAN unsigned int kasan_depth; #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack */ int curr_ret_stack; /* Stack of return addresses for return function tracing */ struct ftrace_ret_stack *ret_stack; /* time stamp for last schedule */ unsigned long long ftrace_timestamp; /* * Number of functions that haven't been traced * because of depth overrun. */ atomic_t trace_overrun; /* Pause for the tracing */ atomic_t tracing_graph_pause; #endif #ifdef CONFIG_TRACING /* state flags for use by tracers */ unsigned long trace; /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_MEMCG struct memcg_oom_info { struct mem_cgroup *memcg; gfp_t gfp_mask; int order; unsigned int may_oom:1; } memcg_oom; #endif #ifdef CONFIG_UPROBES struct uprobe_task *utask; #endif #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) unsigned int sequential_io; unsigned int sequential_io_avg; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; #endif };