死锁介绍
任务的执行体之间互相持有对方所需的资源而不释放,形成了相互制约而都无法继续执行任务的情况,被称为“死锁”。
死锁案例
线程A持有锁a不释放,需要去获取锁b才能继续执行任务,
线程B持有锁b不释放,需要去获取锁c才能继续执行任务,
线程C持有锁c不释放,需要去获取锁d才能继续执行任务,
线程D持有锁d不释放,需要去获取锁a才能继续执行任务。
线程ABCD陷入了逻辑套死,形成了一个环,因此谁都无法继续执行任务。
如何判断形成死锁
判断形成死锁,只需要解决问题:即能判断线程和线程之后互相持有资源不释放,形成了一个环。
如何判断形成环
需要存储两种关系。1:锁和线程之间的关系。2、线程和线程之间的关系。
针对1,锁和线程之间的关系是多对一的,即,一个线程持有多个锁,但是一个锁最多只能能被一个线程持有。我们可以构建一个结构体数组存储这些关系,一个结构体存储着锁地址和线程id,一个结构体代表着一种关系。
针对2,线程和线程之间的关系,表示一个线程想要获取的资源,是否被另一个线程持有?如果被另一个线程持有,那么是哪个线程?需要用有向图记录,有向图有很多种记录方式,在编程中最常体现为邻接表。如图:
这个就是一个邻接表,线程tA想持有的资源在tB中,线程tB想持有的资源在tC中,以此类推,形成了一个环,也就是一个死锁。邻接表的结构是多个链表的头节点被用数组的方式串起来。
死锁检测组件编写逻辑
主要需要解决一下3块逻辑
线程获取锁
线程解锁
线程如果解锁,需要从锁-持有方数组中删除该关系。若邻接表中记录了其他线程在向该线程索取资源,那么删除也删除该关系。
监控环形成
需要另起一个线程负责监控邻接表,定期检查线程之间是否形成了死锁关系,如果形成了死锁关系,那么将这种关系输出到某处,提示程序员需要修改逻辑bug。
检测组件代码以及应用
代码比较长,了解了逻辑之后可以,可以直接在需要检测的函数的最前面调用init_hook()和start_check()这两个接口,以及提前配置好这两个接口所依赖的各层级函数、变量、定义等。
#define _GNU_SOURCE #include <dlfcn.h> #include <stdio.h> #include <stdlib.h> #include <pthread.h> #include <unistd.h> #include <stdint.h> #if 1 typedef unsigned long int uint64; #define MAX 100 enum Type {PROCESS, RESOURCE}; struct source_type { uint64 id; enum Type type; uint64 lock_id; int degress; }; struct vertex { struct source_type s; struct vertex *next; }; struct task_graph { struct vertex list[MAX]; //邻接表头节点数组 int num;//邻接表包含的链表的数量,也就是头节点数量 struct source_type locklist[MAX]; //锁-持有方数组 int lockidx; //锁-持有关系的数量 pthread_mutex_t mutex; }; struct task_graph *tg = NULL; int path[MAX+1]; int visited[MAX]; int k = 0; int deadlock = 0; type struct vertex *create_vertex(struct source_type type) { //其实就是把传入进来的type在堆空间复制了一份并且返回 struct vertex *tex = (struct vertex *)malloc(sizeof(struct vertex )); tex->s = type; tex->next = NULL; return tex; } int search_vertex(struct source_type type) { //查找所需的头节点在邻接表数组中的位置 int i = 0; for (i = 0;i < tg->num;i ++) { if (tg->list[i].s.type == type.type && tg->list[i].s.id == type.id) { return i; } } return -1; } void add_vertex(struct source_type type) { //增加邻接表数组的元素,也就是又来了一个链表要记录了 if (search_vertex(type) == -1) { tg->list[tg->num].s = type; tg->list[tg->num].next = NULL; tg->num ++; } } int add_edge(struct source_type from, struct source_type to) { //给邻接表中某只链表增加节点,表示头节点对应的线程所需要的资源又被持有了 add_vertex(from); add_vertex(to); struct vertex *v = &(tg->list[search_vertex(from)]); while (v->next != NULL) { v = v->next; } v->next = create_vertex(to); } int verify_edge(struct source_type i, struct source_type j) { //验证头节点对应的线程是不是在重复持有某个锁 if (tg->num == 0) return 0; int idx = search_vertex(i); if (idx == -1) { return 0; } struct vertex *v = &(tg->list[idx]); while (v != NULL) { if (v->s.id == j.id) return 1; v = v->next; } return 0; } int remove_edge(struct source_type from, struct source_type to) { //移除邻接表的某个链表的节点,表示线程和线程之间的关系解除 int idxi = search_vertex(from); int idxj = search_vertex(to); if (idxi != -1 && idxj != -1) { struct vertex *v = &tg->list[idxi]; struct vertex *remove; while (v->next != NULL) { if (v->next->s.id == to.id) { remove = v->next; v->next = v->next->next; free(remove); break; } v = v->next; } } } void print_deadlock(void) { //如果形成了死锁,那么打印环 int i = 0; printf("cycle : "); for (i = 0;i < k-1;i ++) { printf("%ld --> ", tg->list[path[i]].s.id); } printf("%ld\n", tg->list[path[i]].s.id); } int DFS(int idx) { struct vertex *ver = &tg->list[idx]; if (visited[idx] == 1) { path[k++] = idx; print_deadlock(); deadlock = 1; return 0; } visited[idx] = 1; path[k++] = idx; while (ver->next != NULL) { DFS(search_vertex(ver->next->s)); k --; ver = ver->next; } return 1; } int search_for_cycle(int idx) { //查询是否形成了环 struct vertex *ver = &tg->list[idx]; visited[idx] = 1; k = 0; path[k++] = idx; while (ver->next != NULL) { int i = 0; for (i = 0;i < tg->num;i ++) { if (i == idx) continue; visited[i] = 0; } for (i = 1;i <= MAX;i ++) { path[i] = -1; } k = 1; DFS(search_vertex(ver->next->s)); ver = ver->next; } } #endif // #if 1 int search_lock(uint64 lock) { // int i = 0; for (i = 0;i < tg->lockidx;i ++) { if (tg->locklist[i].lock_id == lock) { return i; } } return -1; } int search_empty_lock(uint64 lock) { int i = 0; for (i = 0;i < tg->lockidx;i ++) { if (tg->locklist[i].lock_id == 0) { return i; } } return tg->lockidx; } void lock_before(uint64_t tid, uint64_t lockaddr) { /* 1. if (lockaddr) { tid --> lockaddr.tid; } */ int idx = 0; for (idx = 0;idx < tg->lockidx;idx ++) { if (tg->locklist[idx].lock_id == lockaddr) { // struct source_type from; from.id = tid; from.type = PROCESS; add_vertex(from); struct source_type to; to.id = tg->locklist[idx].id; to.type = PROCESS; add_vertex(to); tg->locklist[idx].degress ++; if (!verify_edge(from, to)) add_edge(from, to); } } } void lock_after(uint64_t tid, uint64_t lockaddr) { /* if (!lockaddr) { tid --> lockaddr; } else { lockaddr.tid = tid; tid -> lockaddr; } */ int idx = 0; if (-1 == (idx = search_lock(lockaddr))) {// int eidx = search_empty_lock(lockaddr); tg->locklist[eidx].id = tid; tg->locklist[eidx].lock_id = lockaddr; tg->lockidx ++; } else { struct source_type from; from.id = tid; from.type = PROCESS; add_vertex(from); struct source_type to; to.id = tg->locklist[idx].id; to.type = PROCESS; add_vertex(to); tg->locklist[idx].degress --; if (verify_edge(from, to)) remove_edge(from, to); tg->locklist[idx].id = tid; } } void unlock_after(uint64_t tid, uint64_t lockaddr) { // lockaddr.tid = 0; int idx = search_lock(lockaddr); if (tg->locklist[idx].degress == 0) { tg->locklist[idx].id = 0; tg->locklist[idx].lock_id = 0; } } void check_dead_lock(void) { int i = 0; deadlock = 0; for (i = 0;i < tg->num;i ++) { if (deadlock == 1) break; search_for_cycle(i); } if (deadlock == 0) { printf("no deadlock\n"); } } static void *thread_routine(void *args) { while (1) { sleep(5); check_dead_lock(); } } void start_check(void) { tg = (struct task_graph*)malloc(sizeof(struct task_graph)); tg->num = 0; tg->lockidx = 0; pthread_t tid; pthread_create(&tid, NULL, thread_routine, NULL); } // hook // define typedef int (*pthread_mutex_lock_t)(pthread_mutex_t *mutex); pthread_mutex_lock_t pthread_mutex_lock_f = NULL; typedef int (*pthread_mutex_unlock_t)(pthread_mutex_t *mutex); pthread_mutex_unlock_t pthread_mutex_unlock_f = NULL; // implement int pthread_mutex_lock(pthread_mutex_t *mutex) { pthread_t selfid = pthread_self(); lock_before((uint64_t)selfid, (uint64_t)mutex); pthread_mutex_lock_f(mutex); lock_after((uint64_t)selfid, (uint64_t)mutex); } int pthread_mutex_unlock(pthread_mutex_t *mutex) { pthread_mutex_unlock_f(mutex); pthread_t selfid = pthread_self(); unlock_after((uint64_t)selfid, (uint64_t)mutex); } // init void init_hook(void) { if (!pthread_mutex_lock_f) pthread_mutex_lock_f = dlsym(RTLD_NEXT, "pthread_mutex_lock"); if (!pthread_mutex_unlock_f) pthread_mutex_unlock_f = dlsym(RTLD_NEXT, "pthread_mutex_unlock"); } #endif // #if 1 //sample pthread_mutex_t r1 = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t r2 = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t r3 = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t r4 = PTHREAD_MUTEX_INITIALIZER; pthread_mutex_t r5 = PTHREAD_MUTEX_INITIALIZER; void *t1_cb(void *arg) { printf("t1: %ld\n", pthread_self()); pthread_mutex_lock(&r1); sleep(1); pthread_mutex_lock(&r2); pthread_mutex_unlock(&r2); pthread_mutex_unlock(&r1); } void *t2_cb(void *arg) { printf("t2: %ld\n", pthread_self()); pthread_mutex_lock(&r2); sleep(1); pthread_mutex_lock(&r3); pthread_mutex_unlock(&r3); pthread_mutex_unlock(&r2); } void *t3_cb(void *arg) { printf("t3: %ld\n", pthread_self()); pthread_mutex_lock(&r3); sleep(1); pthread_mutex_lock(&r4); pthread_mutex_unlock(&r4); pthread_mutex_unlock(&r3); } void *t4_cb(void *arg) { printf("t4: %ld\n", pthread_self()); pthread_mutex_lock(&r4); sleep(1); pthread_mutex_lock(&r5); pthread_mutex_unlock(&r5); pthread_mutex_unlock(&r4); } void *t5_cb(void *arg) { printf("t5: %ld\n", pthread_self()); pthread_mutex_lock(&r1); sleep(1); pthread_mutex_lock(&r5); pthread_mutex_unlock(&r5); pthread_mutex_unlock(&r1); } // deadlock // int main() { init_hook(); //重载lock与unlock函数,保留原有功能的基础上增加lock_before.lock_after,unlock_after操作 start_check(); //另起一个线程定期检查是否形成死锁 //形成了死锁 pthread_t t1, t2, t3, t4, t5; pthread_create(&t1, NULL, t1_cb, NULL); pthread_create(&t2, NULL, t2_cb, NULL); pthread_create(&t3, NULL, t3_cb, NULL); pthread_create(&t4, NULL, t4_cb, NULL); pthread_create(&t5, NULL, t5_cb, NULL); pthread_join(t1, NULL); pthread_join(t2, NULL); pthread_join(t3, NULL); pthread_join(t4, NULL); pthread_join(t5, NULL); printf("complete\n"); } #endif