死锁及死锁检测

240 阅读 0 评论 159 点赞

我是靠谱客的博主安详皮卡丘，这篇文章主要介绍死锁及死锁检测，现在分享给大家，希望可以做个参考。

一、什么是死锁

线程a占有资源1，线程b占有资源2，现在线程a想要访问资源2，线程b想要访问资源1；
这样两个线程都访问不到自己想要的资源，并且互相僵持在这，我们将这总现象称之为死锁。
在这里插入图片描述
这只是两个线程的例子，如果是多个线程，死锁是什么样子呢？

多个线程之间依次想要访问其他线程的资源，这样相互僵持形成的一个访问闭环。

二、死锁产生的条件

1.条件互斥：进程/线程要求对所分配的资源进行排它性控制，即在一段时间内某资源仅为一进程/线程所占用。
2.请求和保持：当进程/线程因请求资源而阻塞时，对已获得的资源保持不放。
3.不剥夺：进程/线程已获得的资源在未使用完之前，不能剥夺，只能在使用完时由自己释放。
4.环路等待：在发生死锁时，必然存在一个进程/线程——资源的环形链。

三、如何检测死锁

资源获取环可以采用图来存储，使用有向图来存储。
线程 A 获取线程 B 已占用的锁，则为线程 A 指向线程 B。
运行过程中线程 B 获取成功的锁即为线程 B 已占用的锁（可以使用hook方法得到）。
检测的原理采用另一个线程定时对图进程检测是否有环的存在。

四、死锁检测组件的实现

1、先构建出有向图的数据结构，然后hook住标准的加解锁api（pthread_mutex_lock）；

hook类似于c++语法中的重写

2、在hook方法中把线程与锁的关系构建成一个有向图（线程为图的顶点，线程与线程之间因为锁的关系确定为边），最后我们需要在程序的运行期间时刻监控线程与锁之间的关系，通过线程在加锁前、加锁后以及释放锁之后的3个阶段来维护有向图的正确性（通过有向图的状态我们就可以判断是否有死锁）。
（1）加锁之前：当前线程需要加的锁是否被其他线程占用，如果是，就让当前线程指向占有锁的线程（构成一条边）。

举例：线程A需要对线程B已经lock的锁lock的话，需要在线程A到线程B之间加一个边，线程A指向线程B。

（2）加锁之后：需要将锁和线程建立起一对一的关系（说明该锁目前被哪个线程使用）,存在2种情况：

a.该锁之前没有被其他线程lock过，直接建立起线程id和锁id的关系-这种情况很明了，就是使用一个结构体变量来表示对应的线程id和锁id。

b.该锁之前被其他线程lock过，但是后来被该线程unlock了，这时候需要判断当前线程和该线程之间是否存在边，如果存在，需要先删除边，然后再将锁id和当前线程建立起一对一关系。

举例：对于b来说，按照步骤（1）的例子来说，如果线程A与线程B之间有线程A指向线程B的边，并且B在lock锁之后又unlock了该锁，这时候A就能够对该锁lock了，但是lock之前需要将A到B的边进行删除，因为该锁已经从B转移到了A。

（3）释放锁之后：查询锁id的下标，然后将其锁id和线程id设置为0（清除步骤二建立的对应关系）。

3、对每一个节点都进行深度遍历，半段路径中是否存在闭环现象，若存在则就有死锁。

五、c实现代码

#define _GNU_SOURCE	//此宏可以开启dlfcn库里的一些开关
#include <dlfcn.h>

#include <stdio.h>
#include <pthread.h>
#include <unistd.h>

#include <stdlib.h>
#include <stdint.h>

#include <unistd.h>

#define THREAD_NUM      10

typedef unsigned long int uint64;

//hook住pthread_mutex_lock()方法
typedef int (*pthread_mutex_lock_t)(pthread_mutex_t *mutex);

pthread_mutex_lock_t pthread_mutex_lock_f;

//hook住pthread_mutex_unlock()方法
typedef int (*pthread_mutex_unlock_t)(pthread_mutex_t *mutex);

pthread_mutex_unlock_t pthread_mutex_unlock_f;


#if 1 // 图的结构

#define MAX		100

enum Type {PROCESS, RESOURCE}; //将进程/线程与对应的资源（锁）关联到一起

struct source_type 	//顶点数据包
{
	uint64 id;		//线程id
	enum Type type;	//线程与资源的关联

	uint64 lock_id;	//锁id
	int degress;	//锁的标志，1表示被线程lock，0表示unlock
};

struct vertex //顶点
{
	struct source_type s;
	struct vertex *next;	//

};

struct task_graph //图结构
{
	struct vertex list[MAX];	//所有顶点存放在一个数组中
	int num;					//顶点个数

	struct source_type locklist[MAX];	//所有线程对应的锁的列表（一个线程可能有多个锁，那么就是一个线程就有多个记录）
	int lockidx;	//锁id下标

	pthread_mutex_t mutex;
};

struct task_graph *tg = NULL;
int path[MAX+1];
int visited[MAX];
int k = 0;
int deadlock = 0;

struct vertex *create_vertex(struct source_type type) 	//创建顶点
{
	struct vertex *tex = (struct vertex *)malloc(sizeof(struct vertex ));

	tex->s = type;
	tex->next = NULL;

	return tex;

}


int search_vertex(struct source_type type) 	//查找顶点
{
	int i = 0;

	for (i = 0;i < tg->num;i ++) 
	{
		if (tg->list[i].s.type == type.type && tg->list[i].s.id == type.id) 
			return i;

	}

	return -1;
}

void add_vertex(struct source_type type) 	//增加顶点
{
	if (search_vertex(type) == -1) 
	{
		tg->list[tg->num].s = type;
		tg->list[tg->num].next = NULL;
		tg->num ++;

	}

}


int add_edge(struct source_type from, struct source_type to) 	//增加边
{
	add_vertex(from);
	add_vertex(to);

	struct vertex *v = &(tg->list[search_vertex(from)]);

	while (v->next != NULL) 
	{
		v = v->next;
	}

	v->next = create_vertex(to);

}


int verify_edge(struct source_type i, struct source_type j) 	//验证节点i和j之间是否存在边
{
	if (tg->num == 0) 
		return 0;

	int idx = search_vertex(i);
	if (idx == -1) 
		return 0;

	struct vertex *v = &(tg->list[idx]);

	while (v != NULL) 
	{
		if (v->s.id == j.id) return 1;

		v = v->next;
		
	}

	return 0;

}


int remove_edge(struct source_type from, struct source_type to) 	//移除节点from到to之间的边
{
	int idxi = search_vertex(from);
	int idxj = search_vertex(to);

	if (idxi != -1 && idxj != -1) 
	{
		struct vertex *v = &tg->list[idxi];
		struct vertex *remove;

		while (v->next != NULL) 
		{
			if (v->next->s.id == to.id) 
			{
				remove = v->next;
				v->next = v->next->next;

				free(remove);
				break;

			}

			v = v->next;
		}

	}

}


void print_deadlock(void) 	//打印死锁信息
{
	int i = 0;

	printf("deadlock : ");
	for (i = 0;i < k-1;i ++) 
	{
		printf("%ld --> ", tg->list[path[i]].s.id);
	}

	printf("%ldn", tg->list[path[i]].s.id);

}

int DFS(int idx) 	//深度遍历图
{
	struct vertex *ver = &tg->list[idx];
	if (visited[idx] == 1) 
	{
		path[k++] = idx;
		print_deadlock();
		deadlock = 1;
		
		return 0;
	}

	visited[idx] = 1;
	path[k++] = idx;

	while (ver->next != NULL) 
	{
		DFS(search_vertex(ver->next->s));
		k --;
		
		ver = ver->next;

	}

	
	return 1;

}


int search_for_cycle(int idx) 	//从idx下标节点开始检测是否存在环
{
	struct vertex *ver = &tg->list[idx];
	visited[idx] = 1;
	k = 0;
	path[k++] = idx;

	while (ver->next != NULL) 
	{
		int i = 0;
		for (i = 0;i < tg->num;i ++)
		{
			if (i == idx) continue;
			
			visited[i] = 0;
		}

		for (i = 1;i <= MAX;i ++) 
		{
			path[i] = -1;
		}
		k = 1;

		DFS(search_vertex(ver->next->s));
		ver = ver->next;
	}

}


#if 0	//图的测试用例
int main() {


	tg = (struct task_graph*)malloc(sizeof(struct task_graph));
	tg->num = 0;

	struct source_type v1;
	v1.id = 1;
	v1.type = PROCESS;
	add_vertex(v1);

	struct source_type v2;
	v2.id = 2;
	v2.type = PROCESS;
	add_vertex(v2);

	struct source_type v3;
	v3.id = 3;
	v3.type = PROCESS;
	add_vertex(v3);

	struct source_type v4;
	v4.id = 4;
	v4.type = PROCESS;
	add_vertex(v4);

	
	struct source_type v5;
	v5.id = 5;
	v5.type = PROCESS;
	add_vertex(v5);


	add_edge(v1, v2);
	add_edge(v2, v3);
	add_edge(v3, v4);
	add_edge(v4, v5);
	add_edge(v3, v1);
	
	search_for_cycle(search_vertex(v1));

}
#endif


#endif







void check_dead_lock(void) 	//检测死锁
{
	int i = 0;

	deadlock = 0;
	for (i = 0;i < tg->num;i ++) 
	{
		if (deadlock == 1) break;
		search_for_cycle(i);
	}

	if (deadlock == 0) 
		printf("no deadlockn");

}


static void *thread_routine(void *args) 	//进行死锁检测的线程
{
	while (1) 	//每隔5秒检测一次
	{
		sleep(5);
		check_dead_lock();
	}

}


void start_check(void) 	//启动死锁检测
{
	tg = (struct task_graph*)malloc(sizeof(struct task_graph));
	tg->num = 0;
	tg->lockidx = 0;
	
	pthread_t tid;

	pthread_create(&tid, NULL, thread_routine, NULL);

}


#if 1

int search_lock(uint64 lock) 	//搜索lock是否在锁列表中
{
	int i = 0;
	
	for (i = 0;i < tg->lockidx;i ++) 
	{
		if (tg->locklist[i].lock_id == lock)	//如果lock存在就返回它在锁列表中的下标位置
			return i;
	}

	return -1;
}

int search_empty_lock(uint64 lock) 	//找到锁列表中的一个空位置
{
	int i = 0;	
	for (i = 0;i < tg->lockidx;i ++)
	{
		if (tg->locklist[i].lock_id == 0) 
			return i;
	}

	return tg->lockidx;

}

#endif

int inc(int *value, int add) 	//封装的原子操作
{
	int old;

	__asm__ volatile(
		"lock;xaddl %2, %1;"
		: "=a"(old)
		: "m"(*value), "a" (add)
		: "cc", "memory"
	);
	
	return old;
}


void print_locklist(void) 	//打印锁列表的信息
{
	int i = 0;

	printf("print_locklist: n");
	printf("---------------------n");
	for (i = 0;i < tg->lockidx;i ++) 
	{
		printf("threadid : %ld, lockid: %ldn", tg->locklist[i].id, tg->locklist[i].lock_id);
	}
	printf("---------------------nnn");
}

void lock_before(uint64 thread_id, uint64 lockaddr) 	//thread_id线程对lockaddr锁加锁之前的处理
{
	int idx = 0;
	// list<threadid, toThreadid>

	for(idx; idx < tg->lockidx; idx ++) 
	{
		if ((tg->locklist[idx].lock_id == lockaddr)) //此锁（资源）已经存在锁列表中，及正在被其他线程占用
		{
			struct source_type from;
			from.id = thread_id;
			from.type = PROCESS;
			add_vertex(from);

			struct source_type to;
			to.id = tg->locklist[idx].id;
			tg->locklist[idx].degress++;
			to.type = PROCESS;
			add_vertex(to);
			
			if (!verify_edge(from, to)) 	//如果这条边不存在就加上
				add_edge(from, to); // 	这里可以做死锁的解决方案

		}
	}
}

void lock_after(uint64 thread_id, uint64 lockaddr) 	//thread_id线程对lockaddr锁加锁之后的处理
{
	int idx = 0;
	if (-1 == (idx = search_lock(lockaddr))) 	//lock不在锁列表中，及没有被使用过
	{  // lock list opera 

		int eidx = search_empty_lock(lockaddr);	//将锁加入列表
		
		tg->locklist[eidx].id = thread_id;
		tg->locklist[eidx].lock_id = lockaddr;
		
		inc(&tg->lockidx, 1);
		
	} 
	else 	//说明此锁之前被其他线程访问过，要做的是判断此线程和之前线程之间的边是否被清除（之前线程将此锁unlock了，所以此线程才能lock）
	{
		struct source_type from;
		from.id = thread_id;
		from.type = PROCESS;

		struct source_type to;
		to.id = tg->locklist[idx].id;
		tg->locklist[idx].degress --;
		to.type = PROCESS;

		if (verify_edge(from, to))	//若边存在则清除掉
			remove_edge(from, to);

		
		tg->locklist[idx].id = thread_id;	//并修改此锁现在所对应的线程

	}
	
}

void unlock_after(uint64 thread_id, uint64 lockaddr) 	//thread_id线程释放了lockaddr锁之后的处理
{
	int idx = search_lock(lockaddr);

	if (tg->locklist[idx].degress == 0) 	//此锁已被unlock，则清除它所对应的关系
	{
		tg->locklist[idx].id = 0;
		tg->locklist[idx].lock_id = 0;
		//inc(&tg->lockidx, -1);
	}
	
}



int pthread_mutex_lock(pthread_mutex_t *mutex) 	//加锁的hook方法
{
    pthread_t selfid = pthread_self(); //
    
	lock_before(selfid, (uint64)mutex);
    pthread_mutex_lock_f(mutex);
	lock_after(selfid, (uint64)mutex);

}

int pthread_mutex_unlock(pthread_mutex_t *mutex) 	//解锁的hook方法
{
	pthread_t selfid = pthread_self();

    pthread_mutex_unlock_f(mutex);
	unlock_after(selfid, (uint64)mutex);


}

static int init_hook() 	//初始化加解锁的hook方法
{
    pthread_mutex_lock_f = dlsym(RTLD_NEXT, "pthread_mutex_lock");

    pthread_mutex_unlock_f = dlsym(RTLD_NEXT, "pthread_mutex_unlock");

}



#if 0  //debug

pthread_mutex_t mutex_1 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mutex_2 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mutex_3 = PTHREAD_MUTEX_INITIALIZER;
pthread_mutex_t mutex_4 = PTHREAD_MUTEX_INITIALIZER;

void *thread_rountine_1(void *args)
{
	pthread_t selfid = pthread_self(); //

	printf("thread_routine 1 : %ld n", selfid);
	
    pthread_mutex_lock(&mutex_1);
    sleep(1);
    pthread_mutex_lock(&mutex_2);

    pthread_mutex_unlock(&mutex_2);
    pthread_mutex_unlock(&mutex_1);

    return (void *)(0);
}

void *thread_rountine_2(void *args)
{
	pthread_t selfid = pthread_self(); //

	printf("thread_routine 2 : %ld n", selfid);
	
    pthread_mutex_lock(&mutex_2);
    sleep(1);
    pthread_mutex_lock(&mutex_3);

    pthread_mutex_unlock(&mutex_3);
    pthread_mutex_unlock(&mutex_2);

    return (void *)(0);
}

void *thread_rountine_3(void *args)
{
	pthread_t selfid = pthread_self(); //

	printf("thread_routine 3 : %ld n", selfid);

    pthread_mutex_lock(&mutex_3);
    sleep(1);
    pthread_mutex_lock(&mutex_4);

    pthread_mutex_unlock(&mutex_4);
    pthread_mutex_unlock(&mutex_3);

    return (void *)(0);
}

void *thread_rountine_4(void *args)
{
	pthread_t selfid = pthread_self(); //

	printf("thread_routine 4 : %ld n", selfid);
	
    pthread_mutex_lock(&mutex_4);
    sleep(1);
    pthread_mutex_lock(&mutex_1);

    pthread_mutex_unlock(&mutex_1);
    pthread_mutex_unlock(&mutex_4);

    return (void *)(0);
}


int main()
{

    
    init_hook();
	start_check();

	printf("start_checkn");

    pthread_t tid1, tid2, tid3, tid4;
    pthread_create(&tid1, NULL, thread_rountine_1, NULL);
    pthread_create(&tid2, NULL, thread_rountine_2, NULL);
    pthread_create(&tid3, NULL, thread_rountine_3, NULL);
    pthread_create(&tid4, NULL, thread_rountine_4, NULL);

    pthread_join(tid1, NULL);
    pthread_join(tid2, NULL);
    pthread_join(tid3, NULL);
    pthread_join(tid4, NULL);

    return 0;
}

#endif