文章目录
死锁的四大条件与预防策略详解
死锁(Deadlock)是指多个线程或进程因相互等待资源而永久阻塞的现象。
一、死锁的产生条件(四个必要条件)
- 互斥条件:资源不能被多个线程同时占用。
- 占有且等待:一个线程至少持有一个资源,同时等待其他线程所持有的资源。
- 不可剥夺:线程持有的资源在未使用完之前不能被强制剥夺。
- 循环等待:存在一个线程等待链,链中的每个线程都在等待下一个线程所持有的资源。
只要四个条件同时满足,就可能发生死锁。
二、代码示例
#include <iostream>
#include <thread>
#include <mutex>
#include <chrono>
// 定义两个互斥量
std::mutex mutexA;
std::mutex mutexB;
void deadlock_version() {
std::thread t1([] {
std::lock_guard<std::mutex> lockA(mutexA);
std::cout << "[T1] Locked mutexA\n";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
std::lock_guard<std::mutex> lockB(mutexB); // 等待 mutexB
std::cout << "[T1] Locked mutexB\n";
});
std::thread t2([] {
std::lock_guard<std::mutex> lockB(mutexB);
std::cout << "[T2] Locked mutexB\n";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
std::lock_guard<std::mutex> lockA(mutexA); // 等待 mutexA
std::cout << "[T2] Locked mutexA\n";
});
t1.join();
t2.join();
}
void no_deadlock_version() {
std::thread t1([] {
std::unique_lock<std::mutex> lockA(mutexA, std::defer_lock);
std::unique_lock<std::mutex> lockB(mutexB, std::defer_lock);
std::lock(lockA, lockB); // 同时加锁,避免死锁
std::cout << "[T1] Locked mutexA and mutexB safely\n";
});
std::thread t2([] {
std::unique_lock<std::mutex> lockA(mutexA, std::defer_lock);
std::unique_lock<std::mutex> lockB(mutexB, std::defer_lock);
std::lock(lockA, lockB); // 同样顺序加锁
std::cout << "[T2] Locked mutexA and mutexB safely\n";
});
t1.join();
t2.join();
}
int main() {
std::cout << "1. 死锁版本开始:\n";
deadlock_version();
// 给死锁版本 3 秒时间卡住(演示用)
// std::this_thread::sleep_for(std::chrono::seconds(3));
// std::cout << "\n2. 无死锁版本开始:\n";
// no_deadlock_version();
deadlock_thread.join(); // 等待死锁线程(实际上它可能永远卡住)
return 0;
}
🧪 输出示例:
1. 死锁版本开始:
[T1] Locked mutexA
[T2] Locked mutexB
(此时死锁,程序卡住)
2. 无死锁版本开始:
[T1] Locked mutexA and mutexB safely
[T2] Locked mutexA and mutexB safely
🛠 如何调试死锁(gdb 示例)
g++ -g deadlock_example.cpp -o deadlock_example -pthread
gdb ./deadlock_example
(gdb) run
(程序卡住时)<ctrl + c>
(gdb) thread apply all bt
(gdb) run
Starting program: /.../a.out
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
1. 死锁版本开始:
[New Thread 0x7ffff77ff6c0 (LWP 414762)]
[T1] Locked mutexA
[New Thread 0x7ffff6ffe6c0 (LWP 414763)]
[T2] Locked mutexB
^C
Thread 1 "a.out" received signal SIGINT, Interrupt.
Download failed: Invalid argument. Continuing without source file ./nptl/./nptl/futex-internal.c.
0x00007ffff7898d71 in __futex_abstimed_wait_common64 (private=128, cancel=true, abstime=0x0, op=265, expected=414762, futex_word=0x7ffff77ff990) at ./nptl/futex-internal.c:57
warning: 57 ./nptl/futex-internal.c: No such file or directory
(gdb) info threads
Id Target Id Frame
* 1 Thread 0x7ffff7e8c740 (LWP 414759) "a.out" 0x00007ffff7898d71 in __futex_abstimed_wait_common64 (private=128, cancel=true, abstime=0x0, op=265, expected=414762,
futex_word=0x7ffff77ff990) at ./nptl/futex-internal.c:57
2 Thread 0x7ffff77ff6c0 (LWP 414762) "a.out" futex_wait (private=0, expected=2, futex_word=0x55555555b1a0 <mutex>) at ../sysdeps/nptl/futex-internal.h:146
3 Thread 0x7ffff6ffe6c0 (LWP 414763) "a.out" futex_wait (private=0, expected=2, futex_word=0x55555555b160 <mutexA>) at ../sysdeps/nptl/futex-internal.h:146
(gdb) thread apply all bt
Thread 3 (Thread 0x7ffff6ffe6c0 (LWP 414763) "a.out"):
#0 futex_wait (private=0, expected=2, futex_word=0x55555555b160 <mutexA>) at ../sysdeps/nptl/futex-internal.h:146
#1 __GI___lll_lock_wait (futex=futex@entry=0x55555555b160 <mutexA>, private=0) at ./nptl/lowlevellock.c:49
#2 0x00007ffff78a0101 in lll_mutex_lock_optimized (mutex=0x55555555b160 <mutexA>) at ./nptl/pthread_mutex_lock.c:48
#3 ___pthread_mutex_lock (mutex=0x55555555b160 <mutexA>) at ./nptl/pthread_mutex_lock.c:93
#4 0x000055555555680b in __gthread_mutex_lock(pthread_mutex_t*) ()
#5 0x00005555555569be in std::mutex::lock() ()
#6 0x0000555555556a5c in std::lock_guard<std::mutex>::lock_guard(std::mutex&) ()
#7 0x00005555555554b9 in deadlock_version()::{lambda()#2}::operator()() const ()
#8 0x0000555555556705 in void std::__invoke_impl<void, deadlock_version()::{lambda()#2}>(std::__invoke_other, deadlock_version()::{lambda()#2}&&) ()
#9 0x00005555555565fb in std::__invoke_result<deadlock_version()::{lambda()#2}>::type std::__invoke<deadlock_version()::{lambda()#2}>(deadlock_version()::{lambda()#2}&&) ()
#10 0x00005555555564d6 in void std::thread::_Invoker<std::tuple<deadlock_version()::{lambda()#2}> >::_M_invoke<0ul>(std::_Index_tuple<0ul>) ()
#11 0x0000555555556436 in std::thread::_Invoker<std::tuple<deadlock_version()::{lambda()#2}> >::operator()() ()
#12 0x00005555555563be in std::thread::_State_impl<std::thread::_Invoker<std::tuple<deadlock_version()::{lambda()#2}> > >::_M_run() ()
#13 0x00007ffff7cecdb4 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
#14 0x00007ffff789caa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
#15 0x00007ffff7929c3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
Thread 2 (Thread 0x7ffff77ff6c0 (LWP 414762) "a.out"):
#0 futex_wait (private=0, expected=2, futex_word=0x55555555b1a0 <mutex>) at ../sysdeps/nptl/futex-internal.h:146
#1 __GI___lll_lock_wait (futex=futex@entry=0x55555555b1a0 <mutex>, private=0) at ./nptl/lowlevellock.c:49
#2 0x00007ffff78a0101 in lll_mutex_lock_optimized (mutex=0x55555555b1a0 <mutex>) at ./nptl/pthread_mutex_lock.c:48
#3 ___pthread_mutex_lock (mutex=0x55555555b1a0 <mutex>) at ./nptl/pthread_mutex_lock.c:93
#4 0x000055555555680b in __gthread_mutex_lock(pthread_mutex_t*) ()
#5 0x00005555555569be in std::mutex::lock() ()
#6 0x0000555555556a5c in std::lock_guard<std::mutex>::lock_guard(std::mutex&) ()
#7 0x000055555555539d in deadlock_version()::{lambda()#1}::operator()() const ()
--Type <RET> for more, q to quit, c to continue without paging--
#8 0x0000555555556742 in void std::__invoke_impl<void, deadlock_version()::{lambda()#1}>(std::__invoke_other, deadlock_version()::{lambda()#1}&&) ()
#9 0x000055555555664e in std::__invoke_result<deadlock_version()::{lambda()#1}>::type std::__invoke<deadlock_version()::{lambda()#1}>(deadlock_version()::{lambda()#1}&&) ()
#10 0x0000555555556502 in void std::thread::_Invoker<std::tuple<deadlock_version()::{lambda()#1}> >::_M_invoke<0ul>(std::_Index_tuple<0ul>) ()
#11 0x0000555555556452 in std::thread::_Invoker<std::tuple<deadlock_version()::{lambda()#1}> >::operator()() ()
#12 0x00005555555563e2 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<deadlock_version()::{lambda()#1}> > >::_M_run() ()
#13 0x00007ffff7cecdb4 in ?? () from /lib/x86_64-linux-gnu/libstdc++.so.6
#14 0x00007ffff789caa4 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:447
#15 0x00007ffff7929c3c in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:78
Thread 1 (Thread 0x7ffff7e8c740 (LWP 414759) "a.out"):
#0 0x00007ffff7898d71 in __futex_abstimed_wait_common64 (private=128, cancel=true, abstime=0x0, op=265, expected=414762, futex_word=0x7ffff77ff990) at ./nptl/futex-internal.c:57
#1 __futex_abstimed_wait_common (cancel=true, private=128, abstime=0x0, clockid=0, expected=414762, futex_word=0x7ffff77ff990) at ./nptl/futex-internal.c:87
#2 __GI___futex_abstimed_wait_cancelable64 (futex_word=futex_word@entry=0x7ffff77ff990, expected=414762, clockid=clockid@entry=0, abstime=abstime@entry=0x0, private=private@entry=128) at ./nptl/futex-internal.c:139
#3 0x00007ffff789e7a3 in __pthread_clockjoin_ex (threadid=140737345746624, thread_return=0x0, clockid=0, abstime=0x0, block=<optimized out>) at ./nptl/pthread_join_common.c:102
#4 0x00007ffff7cece33 in std::thread::join() () from /lib/x86_64-linux-gnu/libstdc++.so.6
#5 0x000055555555559c in deadlock_version() ()
#6 0x00005555555558ee in main ()
会看到两个线程分别卡在获取 mutexB 和 mutexA 的地方,形成典型的死锁等待链。
🔍 解读 GDB 栈:
🧵 Thread 2
#6 std::lock_guard<std::mutex>::lock_guard(std::mutex&) ()
#7 deadlock_version()::{lambda()#1}::operator()() const ()
线程 2 正在尝试加锁 mutex
:
futex_word=0x55555555b1a0 <mutex>
说明线程 2 被阻塞在 mutex
上。
🧵 Thread 3
#6 std::lock_guard<std::mutex>::lock_guard(std::mutex&) ()
#7 deadlock_version()::{lambda()#2}::operator()() const ()
线程 3 正在尝试加锁 mutexA
:
futex_word=0x55555555b160 <mutexA>
说明线程 3 被阻塞在 mutexA
上。
🧵 Thread 1(主线程)
#4 std::thread::join()
#5 deadlock_version()
主线程正在等待 thread 2 和 thread 3 退出,因此处于阻塞中。
通过 gdb thread apply all bt
看到:
- 两个线程分别阻塞在
mutex
和mutexA
上。 - 推测这两个锁是对方线程正在持有的。
- 主线程卡在
join()
,等待死锁线程结束。 - 完全满足“循环等待”的死锁条件。
三、死锁的预防手段(以 C/C++ 为例)
1. 破坏“循环等待” —— 统一加锁顺序(推荐)
- 所有线程按固定顺序加锁,比如先加
mutex
再加mutexA
std::mutex m1, m2;
void threadA() {
std::lock_guard<std::mutex> lock1(m1);
std::this_thread::sleep_for(std::chrono::milliseconds(100));
std::lock_guard<std::mutex> lock2(m2); // 始终按照 m1->m2 加锁顺序
// 临界区
}
void threadB() {
std::lock_guard<std::mutex> lock1(m1); // 避免 m2->m1 加锁顺序
std::this_thread::sleep_for(std::chrono::milliseconds(100));
std::lock_guard<std::mutex> lock2(m2);
}
2. 使用 std::lock
一次性加多个锁
void safe_function() {
std::unique_lock<std::mutex> lock1(m1, std::defer_lock);
std::unique_lock<std::mutex> lock2(m2, std::defer_lock);
std::lock(lock1, lock2); // 避免死锁
}
使用 std::lock(mutex1, mutex2)
+ std::lock_guard
:
std::lock(mutex, mutexA);
std::lock_guard<std::mutex> lk1(mutex, std::adopt_lock);
std::lock_guard<std::mutex> lk2(mutexA, std::adopt_lock);
3. 使用 try_lock
实现非阻塞获取资源
void try_lock_example() {
if (m1.try_lock()) {
if (m2.try_lock()) {
// 成功获取两个锁
m2.unlock();
}
m1.unlock();
}
}
4. 超时机制(C++17 起)
std::timed_mutex tmutex;
void timed_lock_example() {
if (tmutex.try_lock_for(std::chrono::milliseconds(100))) {
// 成功获得锁
tmutex.unlock();
} else {
// 获取失败,避免死锁
}
}
5. 添加超时检测或死锁检测逻辑(高级)
待补充~
四、死锁的定位手段
1. 代码审查:查看是否存在资源互相等待逻辑。
2. 日志排查:
- 添加锁操作日志(获取、释放时间、线程ID)。
- 利用 RAII 包装锁并记录日志。
struct LockLogger {
std::mutex& m;
std::string name;
LockLogger(std::mutex& mutex, const std::string& n) : m(m), name(n) {
std::cout << "Locking " << name << std::endl;
m.lock();
}
~LockLogger() {
std::cout << "Unlocking " << name << std::endl;
m.unlock();
}
};
3. 使用工具定位
gdb
+thread apply all bt
:查看线程调用栈,分析是否互相等待。pstack <pid>
:打印所有线程栈。strace -p <pid>
:查看系统调用是否卡在futex
(用户态互斥)上。valgrind
的helgrind
模块:检测数据竞争和死锁。
valgrind --tool=helgrind ./your_program
4. 打印锁地址对应变量
std::cout << &mutex << std::endl;
std::cout << &mutexA << std::endl;
运行时查看地址是否和 GDB 中 0x55555555b1a0
和 0x55555555b160
一致,可进一步确认锁对象。
五、小结
内容 | 说明 |
---|---|
死锁四条件 | 互斥、占有且等待、不可剥夺、循环等待 |
常见预防方式 | 固定加锁顺序、std::lock 、try_lock 、timed_mutex |
定位手段 | 日志追踪、gdb/pstack/valgrind、调试器分析调用栈 |