Linux进程与线程的内核实现

简介: task_struct称为进程描述符结构,该结构定义在文件中。进程描述符中包含一个具体进程的所有信息进程描述符中包含的数据能完整地描述一个正在执行的程序:它打开的文件,进程的地址空间,挂起的信号,进程的状态等

进程描述符task_struct

进程,线程创建的本质

总结

进程描述符task_struct

进程描述符(struct task_struct)

task_struct称为进程描述符结构,该结构定义在文件中。进程描述符中包含一个具体进程的所有信息
进程描述符中包含的数据能完整地描述一个正在执行的程序:它打开的文件,进程的地址空间,挂起的信号,进程的状态等

struct task_struct {
   
   
    volatile long state;    /* -1 unrunnable, 0 runnable, >0 stopped */
    void *stack;
    atomic_t usage;
    unsigned int flags;    /* per process flags, defined below */
    unsigned int ptrace;

#ifdef CONFIG_SMP
    struct llist_node wake_entry;
    int on_cpu;
#endif
    int on_rq;

    int prio, static_prio, normal_prio;
    unsigned int rt_priority;
    const struct sched_class *sched_class;
    struct sched_entity se;
    struct sched_rt_entity rt;
#ifdef CONFIG_CGROUP_SCHED
    struct task_group *sched_task_group;
#endif

#ifdef CONFIG_PREEMPT_NOTIFIERS
    /* list of struct preempt_notifier: */
    struct hlist_head preempt_notifiers;
#endif

    /*
     * fpu_counter contains the number of consecutive context switches
     * that the FPU is used. If this is over a threshold, the lazy fpu
     * saving becomes unlazy to save the trap. This is an unsigned char
     * so that after 256 times the counter wraps and the behavior turns
     * lazy again; this to deal with bursty apps that only use FPU for
     * a short time
     */
    unsigned char fpu_counter;
#ifdef CONFIG_BLK_DEV_IO_TRACE
    unsigned int btrace_seq;
#endif

    unsigned int policy;
    cpumask_t cpus_allowed;

#ifdef CONFIG_PREEMPT_RCU
    int rcu_read_lock_nesting;
    char rcu_read_unlock_special;
    struct list_head rcu_node_entry;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_TREE_PREEMPT_RCU
    struct rcu_node *rcu_blocked_node;
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
#ifdef CONFIG_RCU_BOOST
    struct rt_mutex *rcu_boost_mutex;
#endif /* #ifdef CONFIG_RCU_BOOST */

#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
    struct sched_info sched_info;
#endif

    struct list_head tasks;
#ifdef CONFIG_SMP
    struct plist_node pushable_tasks;
#endif

    struct mm_struct *mm, *active_mm;
#ifdef CONFIG_COMPAT_BRK
    unsigned brk_randomized:1;
#endif
#if defined(SPLIT_RSS_COUNTING)
    struct task_rss_stat    rss_stat;
#endif
/* task state */
    int exit_state;
    int exit_code, exit_signal;
    int pdeath_signal;  /*  The signal sent when the parent dies  */
    unsigned int jobctl;    /* JOBCTL_*, siglock protected */
    /* ??? */
    unsigned int personality;
    unsigned did_exec:1;
    unsigned in_execve:1;    /* Tell the LSMs that the process is doing an
                 * execve */
    unsigned in_iowait:1;


    /* Revert to default priority/policy when forking */
    unsigned sched_reset_on_fork:1;
    unsigned sched_contributes_to_load:1;

#ifdef CONFIG_GENERIC_HARDIRQS
    /* IRQ handler threads */
    unsigned irq_thread:1;
#endif

    pid_t pid;
    pid_t tgid;

#ifdef CONFIG_CC_STACKPROTECTOR
    /* Canary value for the -fstack-protector gcc feature */
    unsigned long stack_canary;
#endif

    /* 
     * pointers to (original) parent process, youngest child, younger sibling,
     * older sibling, respectively.  (p->father can be replaced with 
     * p->real_parent->pid)
     */
    struct task_struct __rcu *real_parent; /* real parent process */
    struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
    /*
     * children/sibling forms the list of my natural children
     */
    struct list_head children;    /* list of my children */
    struct list_head sibling;    /* linkage in my parent's children list */
    struct task_struct *group_leader;    /* threadgroup leader */

    /*
     * ptraced is the list of tasks this task is using ptrace on.
     * This includes both natural children and PTRACE_ATTACH targets.
     * p->ptrace_entry is p's link on the p->parent->ptraced list.
     */
    struct list_head ptraced;
    struct list_head ptrace_entry;

    /* PID/PID hash table linkage. */
    struct pid_link pids[PIDTYPE_MAX];
    struct list_head thread_group;

    struct completion *vfork_done;        /* for vfork() */
    int __user *set_child_tid;        /* CLONE_CHILD_SETTID */
    int __user *clear_child_tid;        /* CLONE_CHILD_CLEARTID */

    cputime_t utime, stime, utimescaled, stimescaled;
    cputime_t gtime;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
    cputime_t prev_utime, prev_stime;
#endif
    unsigned long nvcsw, nivcsw; /* context switch counts */
    struct timespec start_time;         /* monotonic time */
    struct timespec real_start_time;    /* boot based time */
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
    unsigned long min_flt, maj_flt;

    struct task_cputime cputime_expires;
    struct list_head cpu_timers[3];

/* process credentials */
    const struct cred __rcu *real_cred; /* objective and real subjective task
                     * credentials (COW) */
    const struct cred __rcu *cred;    /* effective (overridable) subjective task
                     * credentials (COW) */
    struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */

    char comm[TASK_COMM_LEN]; /* executable name excluding path
                     - access with [gs]et_task_comm (which lock
                       it with task_lock())
                     - initialized normally by setup_new_exec */
/* file system info */
    int link_count, total_link_count;
#ifdef CONFIG_SYSVIPC
/* ipc stuff */
    struct sysv_sem sysvsem;
#endif
#ifdef CONFIG_DETECT_HUNG_TASK
/* hung task detection */
    unsigned long last_switch_count;
#endif
/* CPU-specific state of this task */
    struct thread_struct thread;
/* filesystem information */
    struct fs_struct *fs;
/* open file information */
    struct files_struct *files;
/* namespaces */
    struct nsproxy *nsproxy;
/* signal handlers */
    struct signal_struct *signal;
    struct sighand_struct *sighand;

    sigset_t blocked, real_blocked;
    sigset_t saved_sigmask;    /* restored if set_restore_sigmask() was used */
    struct sigpending pending;

    unsigned long sas_ss_sp;
    size_t sas_ss_size;
    int (*notifier)(void *priv);
    void *notifier_data;
    sigset_t *notifier_mask;
    struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
    uid_t loginuid;
    unsigned int sessionid;
#endif
    seccomp_t seccomp;

/* Thread group tracking */
       u32 parent_exec_id;
       u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
 * mempolicy */
    spinlock_t alloc_lock;

    /* Protection of the PI data structures: */
    raw_spinlock_t pi_lock;

#ifdef CONFIG_RT_MUTEXES
    /* PI waiters blocked on a rt_mutex held by this task */
    struct plist_head pi_waiters;
    /* Deadlock detection and priority inheritance handling */
    struct rt_mutex_waiter *pi_blocked_on;
#endif

#ifdef CONFIG_DEBUG_MUTEXES
    /* mutex deadlock detection */
    struct mutex_waiter *blocked_on;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
    unsigned int irq_events;
    unsigned long hardirq_enable_ip;
    unsigned long hardirq_disable_ip;
    unsigned int hardirq_enable_event;
    unsigned int hardirq_disable_event;
    int hardirqs_enabled;
    int hardirq_context;
    unsigned long softirq_disable_ip;
    unsigned long softirq_enable_ip;
    unsigned int softirq_disable_event;
    unsigned int softirq_enable_event;
    int softirqs_enabled;
    int softirq_context;
#endif
#ifdef CONFIG_LOCKDEP
# define MAX_LOCK_DEPTH 48UL
    u64 curr_chain_key;
    int lockdep_depth;
    unsigned int lockdep_recursion;
    struct held_lock held_locks[MAX_LOCK_DEPTH];
    gfp_t lockdep_reclaim_gfp;
#endif

/* journalling filesystem info */
    void *journal_info;

/* stacked block device info */
    struct bio_list *bio_list;

#ifdef CONFIG_BLOCK
/* stack plugging */
    struct blk_plug *plug;
#endif

/* VM state */
    struct reclaim_state *reclaim_state;

    struct backing_dev_info *backing_dev_info;

    struct io_context *io_context;

    unsigned long ptrace_message;
    siginfo_t *last_siginfo; /* For ptrace use.  */
    struct task_io_accounting ioac;
#if defined(CONFIG_TASK_XACCT)
    u64 acct_rss_mem1;    /* accumulated rss usage */
    u64 acct_vm_mem1;    /* accumulated virtual memory usage */
    cputime_t acct_timexpd;    /* stime + utime since last update */
#endif
#ifdef CONFIG_CPUSETS
    nodemask_t mems_allowed;    /* Protected by alloc_lock */
    seqcount_t mems_allowed_seq;    /* Seqence no to catch updates */
    int cpuset_mem_spread_rotor;
    int cpuset_slab_spread_rotor;
#endif
#ifdef CONFIG_CGROUPS
    /* Control Group info protected by css_set_lock */
    struct css_set __rcu *cgroups;
    /* cg_list protected by css_set_lock and tsk->alloc_lock */
    struct list_head cg_list;
#endif
#ifdef CONFIG_FUTEX
    struct robust_list_head __user *robust_list;
#ifdef CONFIG_COMPAT
    struct compat_robust_list_head __user *compat_robust_list;
#endif
    struct list_head pi_state_list;
    struct futex_pi_state *pi_state_cache;
#endif
#ifdef CONFIG_PERF_EVENTS
    struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
    struct mutex perf_event_mutex;
    struct list_head perf_event_list;
#endif
#ifdef CONFIG_NUMA
    struct mempolicy *mempolicy;    /* Protected by alloc_lock */
    short il_next;
    short pref_node_fork;
#endif
    struct rcu_head rcu;

    /*
     * cache last used pipe for splice
     */
    struct pipe_inode_info *splice_pipe;
#ifdef    CONFIG_TASK_DELAY_ACCT
    struct task_delay_info *delays;
#endif
#ifdef CONFIG_FAULT_INJECTION
    int make_it_fail;
#endif
    /*
     * when (nr_dirtied >= nr_dirtied_pause), it's time to call
     * balance_dirty_pages() for some dirty throttling pause
     */
    int nr_dirtied;
    int nr_dirtied_pause;
    unsigned long dirty_paused_when; /* start of a write-and-pause period */

#ifdef CONFIG_LATENCYTOP
    int latency_record_count;
    struct latency_record latency_record[LT_SAVECOUNT];
#endif
    /*
     * time slack values; these are used to round up poll() and
     * select() etc timeout values. These are in nanoseconds.
     */
    unsigned long timer_slack_ns;
    unsigned long default_timer_slack_ns;

    struct list_head    *scm_work_list;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
    /* Index of current stored address in ret_stack */
    int curr_ret_stack;
    /* Stack of return addresses for return function tracing */
    struct ftrace_ret_stack    *ret_stack;
    /* time stamp for last schedule */
    unsigned long long ftrace_timestamp;
    /*
     * Number of functions that haven't been traced
     * because of depth overrun.
     */
    atomic_t trace_overrun;
    /* Pause for the tracing */
    atomic_t tracing_graph_pause;
#endif
#ifdef CONFIG_TRACING
    /* state flags for use by tracers */
    unsigned long trace;
    /* bitmask and counter of trace recursion */
    unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
    struct memcg_batch_info {
   
   
        int do_batch;    /* incremented when batch uncharge started */
        struct mem_cgroup *memcg; /* target memcg of uncharge */
        unsigned long nr_pages;    /* uncharged usage */
        unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
    } memcg_batch;
#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
    atomic_t ptrace_bp_refcnt;
#endif
};

pid与tgid

image-20231022134040327

tgid全名thread group ID,一个内部有多线程的进程,进程中每个线程的id都不一样,但是对外表现出同一个进程整体

struct task_struct{
   
   
    pid_t pid;//进程的唯一标识
     pid_t tgid;// 线程组的领头线程的pid成员的值
};

进程id编号分配规则

Linux 内核限制进程号需小于等于 32767。新进程创建时,内核会按顺序将下一个可用的进程号分配给其使用。每当进程号达到 32767 的限制时,内核将重置进程号计数器,以便从小整数开始分配。

一旦进程号达到 32767,会将进程号计数器重置为 300,而不是 1。之所以如此,是因为低数值的进程号为系统进程和守护进程所长期占用,在此范围内搜索尚未使用的进程号只会是浪费时间。

内存管理mm_struct

struct task_struct{
   
   
    struct mm_struct* mm;
}

每个进程都有自己独立的虚拟地址空间,使用mm_struct结构体来管理内存,这里的mm指针指向了mm_struct结构体,包含了内存资源的页表,内存映射等

 struct mm_struct{
   
   
     struct vm_area_struct* mmap;
     struct re_root mm_rb;
     //...
     pgd_t* pgd;  
 }

进程与文件,文件系统

task_struct与文件相关的字段最常用的下面这两个

struct task_struct{
   
   
    //文件系统的信息的指针,包含了进程运行的目录信息
    struct fs_struct* fs;

    //打开的文件描述符资源表
    struct files_struct* files;
}

进程,线程创建的本质

fork()和pthread_create()函数最后都会进入clone()系统调用

image-20231022135531221.png

clone函数原型

  • fn:表示clone生成的子进程的起始调用函数,参数由第四个参数arg指定
  • stack:表示生成的子进程的栈空间
  • flags:关键参数,用于区分生成的子进程与父进程如何共享资源(内存,打开文件描述符等)
  • 剩下的参数与线程实现有关
int clone(int (*fn)(void *), 
          void *stack, 
          int flags, 
          void *arg, ...
                 /* pid_t *parent_tid, void *tls, pid_t *child_tid */ );

线程创建的实现pthread_create()

#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <pthread.h>

void* run(void* arg){
   
   

}

int main()
{
   
   
    pthread_t t1;
    pthread_create(&t1, 0, &run, 0);
    pthread_join(t1, 0);
    return 0;
}

此时clone系统调用的flags=CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | ...

标志 含义
CLONE_VM 共享虚拟内存
CLONE_FS 共享文件与系统相关的属性
CLONE_FILES 共享打开的文件描述符
CLONE_SIGHAND 共享对信号的处置
CLONE_THREAD 置于父进程所属的线程组中

进程创建的实现fork()

#include <sys/wait.h>
#include <unistd.h>


int main()
{
   
   
    pid_t pid;
    pid = fork();
    if(pid == 0){
   
   
        //此处是子进程的代码分支
    }else if(pid > 0){
   
   
        //此处是父进程的代码分支
    }
    return 0;
}

此时clone系统调用的flags=CLONE_SIGCHLD | ...

本质:不共享资源,使用cow,任何一个修改都会造成分裂

标志 含义
CLONE_SIGCHLD 接收子进程退出的信号

总结

  • fork()和pthread_create()创建进程或者线程都会调用clone()系统调用
  • pthread_create()调用clone()时传入的flags参数会设置共享虚拟内存,共享文件与系统相关的属性,共享打开的文件描述符,共享对信号的处置,置于父进程所属的线程组中
  • fork()调用clone()时传入的flags参数只会设置接收子进程退出的信号
  • 在内核态中没有进程和线程的概念,内核不会区分进程和线程的操作
目录
相关文章
|
1天前
|
缓存 监控 Linux
linux进程管理万字详解!!!
本文档介绍了Linux系统中进程管理、系统负载监控、内存监控和磁盘监控的基本概念和常用命令。主要内容包括: 1. **进程管理**: - **进程介绍**:程序与进程的关系、进程的生命周期、查看进程号和父进程号的方法。 - **进程监控命令**:`ps`、`pstree`、`pidof`、`top`、`htop`、`lsof`等命令的使用方法和案例。 - **进程管理命令**:控制信号、`kill`、`pkill`、`killall`、前台和后台运行、`screen`、`nohup`等命令的使用方法和案例。
19 4
linux进程管理万字详解!!!
|
2天前
|
缓存 负载均衡 算法
Linux内核中的进程调度算法解析####
本文深入探讨了Linux操作系统核心组件之一——进程调度器,着重分析了其采用的CFS(完全公平调度器)算法。不同于传统摘要对研究背景、方法、结果和结论的概述,本文摘要将直接揭示CFS算法的核心优势及其在现代多核处理器环境下如何实现高效、公平的资源分配,同时简要提及该算法如何优化系统响应时间和吞吐量,为读者快速构建对Linux进程调度机制的认知框架。 ####
|
2天前
|
调度 Python
深入浅出操作系统:进程与线程的奥秘
【10月更文挑战第28天】在数字世界的幕后,操作系统悄无声息地扮演着关键角色。本文将拨开迷雾,深入探讨操作系统中的两个基本概念——进程和线程。我们将通过生动的比喻和直观的解释,揭示它们之间的差异与联系,并展示如何在实际应用中灵活运用这些知识。准备好了吗?让我们开始这段揭秘之旅!
|
3天前
|
消息中间件 存储 Linux
|
4天前
|
缓存 Linux
揭秘Linux内核:探索CPU拓扑结构
【10月更文挑战第26天】
19 1
|
4天前
|
缓存 运维 Linux
深入探索Linux内核:CPU拓扑结构探测
【10月更文挑战第18天】在现代计算机系统中,CPU的拓扑结构对性能优化和资源管理至关重要。了解CPU的核心、线程、NUMA节点等信息,可以帮助开发者和系统管理员更好地调优应用程序和系统配置。本文将深入探讨如何在Linux内核中探测CPU拓扑结构,介绍相关工具和方法。
8 0
|
6天前
|
算法 调度
探索操作系统的心脏:内核与进程管理
【10月更文挑战第25天】在数字世界的复杂迷宫中,操作系统扮演着关键角色,如同人体中的心脏,维持着整个系统的生命力。本文将深入浅出地剖析操作系统的核心组件——内核,以及它如何通过进程管理来协调资源的分配和使用。我们将从内核的概念出发,探讨它在操作系统中的地位和作用,进而深入了解进程管理的机制,包括进程调度、状态转换和同步。此外,文章还将展示一些简单的代码示例,帮助读者更好地理解这些抽象概念。让我们一起跟随这篇文章,揭开操作系统神秘的面纱,理解它如何支撑起我们日常的数字生活。
|
9天前
|
运维 Linux
Linux查找占用的端口,并杀死进程的简单方法
通过上述步骤和命令,您能够迅速识别并根据实际情况管理Linux系统中占用特定端口的进程。为了获得更全面的服务器管理技巧和解决方案,提供了丰富的资源和专业服务,是您提升运维技能的理想选择。
10 1
|
10天前
|
缓存 算法 安全
深入理解Linux操作系统的心脏:内核与系统调用####
【10月更文挑战第20天】 本文将带你探索Linux操作系统的核心——其强大的内核和高效的系统调用机制。通过深入浅出的解释,我们将揭示这些技术是如何协同工作以支撑起整个系统的运行,同时也会触及一些常见的误解和背后的哲学思想。无论你是开发者、系统管理员还是普通用户,了解这些基础知识都将有助于你更好地利用Linux的强大功能。 ####
21 1
|
11天前
|
缓存 编解码 监控
深入探索Linux内核调度机制的奥秘###
【10月更文挑战第19天】 本文旨在以通俗易懂的语言,深入浅出地剖析Linux操作系统内核中的进程调度机制,揭示其背后的设计哲学与实现策略。我们将从基础概念入手,逐步揭开Linux调度策略的神秘面纱,探讨其如何高效、公平地管理系统资源,以及这些机制对系统性能和用户体验的影响。通过本文,您将获得关于Linux调度机制的全新视角,理解其在日常计算中扮演的关键角色。 ###
34 1