本博客梳理红黑树基本性质与底层原理
一、红黑树的概念
红黑树是一棵二叉搜索树,每个节点的颜色不是红色就是黑色,通过对任意一条从根到叶子路径上各节点的颜色进行约束,确保没有一条路径比其他路径长出2倍,
1.红黑树的规则
(1)每个节点不是红色就是黑色
(2)根节点是黑色的
(3)任意一条路径不会有连续的红色节点
(4)对任意一个节点,从该节点到所有NULL节点的路径上,均包含相同数量的黑色节点
2.红黑树的效率:O(logN)
思考:红黑树如何确保“没有一条路径比其他路径长出2倍”?
根据规则(4),对任意一个节点,从该节点到所有NULL节点的路径上,均包含相同数量的黑色节点。假设一颗树里面有一条路径全都是黑色节点,且这个路径是所有纯黑色节点路径里面最长的,那么:最长路径就是包括相同数量的黑色节点,然后在其中嵌入红色节点,红黑相间,刚好就是2倍关系。
二、红黑树底层原理及代码模拟实现
1.红黑树的结构
为了与后面STL容器中的结构相呼应,这里采用键值对型存储方式,即存储pair类型,与后续哈希表保持一致,实际上哈希表底层的数据结构就是红黑树
enum Color
{
RED,
BLACK
};
template<class K, class V>
struct RBTreeNode
{
pair<K, V> _kv;
Color _col;
RBTreeNode* _left;
RBTreeNode* _right;
RBTreeNode* _parent;
RBTreeNode(const pair<K, V>& kv)
: _kv(kv)
, _col(BLACK)
,_left(nullptr)
,_right(nullptr)
,_parent(nullptr)
{ }
};
2.红黑树的插入
通过学习红黑树的插入过程,可以了解到其底层如何保证树的相对平衡,提高搜索效率
插入过程:
a.按二叉搜索树规则进行插入
b.新增节点一定是红色,因为新增节点为黑色会破坏“对任意一个节点,从该节点到所有NULL节点的路径上,均包含相同数量的黑色节点”的规则,不好维护;而新增红色节点,如果父亲为黑色,则结束,如果父亲为红色,则爷爷必为黑色,只需要看父亲的兄弟(uncle)的情况来讨论
c.定义:新增节点为c(cur),c的父亲为p(parent),p的父亲为g(grandfather),p的兄弟为u(uncle)
各种情况汇总:
(1)变色
此处展示了parent == grandfather->_left的情况,parent == grandfather->_right的情况类似,下同。
在图中,cur插入在parent的左树,出现了两个连续的红色节点,此时仅需把p变黑,g变红,u变黑即可,此子树依旧满足红黑树的约束条件,至于向上的树是否满足,还需要继续向上更新。
(2)单旋+变色
此时cur插入在parent的左子树
如果遇到u为黑的情况,那么单纯变色就没法解决问题了。p变为黑色后,不管g怎么变,都会导致g的两条分路黑色节点数量发生变化,违反规则(4),因此需要旋转来介入,确保这棵子树根节点两条分路黑色节点数量在插入前后保持一致
(3)双旋+变色
此时cur插入在parent的右子树
和(2)一样,单纯的变色无法解决问题,需要介入旋转才能保证红黑树的规则。
//new了一个节点并与parent完成连接之后,执行此逻辑
while (parent && parent->_col == RED)//父亲节点是黑的则说明可以结束更新
{
Node* grandfather = parent->_parent;
Node* uncle;
if (parent == grandfather->_left)
{
uncle = grandfather->_right;
//(1)变色:c为红,p为红,g为黑,u存在且为红
if (uncle && uncle->_col == RED)
{
parent->_col = BLACK;
uncle->_col = BLACK;
grandfather->_col = RED;
//继续向上处理
cur = grandfather;
parent = grandfather->_parent;
}
//(2)旋转+变色:c为红,p为红,g为黑,u不存在或u为黑,不需要继续向上处理
else
{
//单旋+变色
// g
// p u
// c
if (cur == parent->_left)
{
RotateR(grandfather);
parent->_col = BLACK;
grandfather->_col = RED;
}
//双旋+变色:c为红,p为红,g为黑,u不存在且u为黑
// g
// p u
// c
else
{
RotateL(parent);
RotateR(grandfather);
cur->_col = BLACK;
grandfather->_col = RED;
}
}
}
else//parent == grandfather->_right
{
uncle = grandfather->_left;
//(1)变色:c为红,p为红,g为黑,u存在且为红
if (uncle && uncle->_col == RED)
{
parent->_col = BLACK;
uncle->_col = BLACK;
grandfather->_col = RED;
//继续向上处理
cur = grandfather;
parent = grandfather->_parent;
}
//(2)旋转+变色:c为红,p为红,g为黑,u不存在且u为黑,不需要继续向上处理
else
{
//单旋+变色
// g
// u p
// c
if (cur == parent->_right)
{
RotateL(grandfather);
parent->_col = BLACK;
grandfather->_col = RED;
}
//双旋+变色:c为红,p为红,g为黑,u不存在且u为黑
// g
// u p
// c
else
{
RotateR(parent);
RotateL(grandfather);
cur->_col = BLACK;
grandfather->_col = RED;
}
}
}
}
3.红黑树的验证
- 对于规则(1),枚举就已经保证了节点颜色不是红色就是黑色;对于规则(2),检查根节点即可,因此检查的重点应当放在规则(3)和规则(4)上。
- 检查函数设计逻辑:在外层先算一条路径上黑色节点的数量,作为参考值refNum传入函数,在函数中遍历其他路径,如果其他路径上统计出来的黑色节点数量与refNum不一致,说明违反规则(4);在检查过程中,还要同时检查当前节点及其父亲是否都是红色,以满足规则(3)
bool check(Node* root, int blackNum, const int refNum)
{
if (root == nullptr)
{
//规则4:每条路径上黑色节点的数量要一致
//判断blackNum和refNum的关系
if (blackNum != refNum)
{
cout << "规则已经违反,路径上黑色节点数量不一致" << endl;
return false;
}
return true;
}
//规则3:不能有连续的红色节点
if (root->_col == RED && root->_parent->_col == RED)
{
cout << "规则已经违反,存在连续的红色节点" << endl;
return false;
}
if (root->_col == BLACK)
blackNum++;
return check(root->_left, blackNum, refNum)
&& check(root->_right, blackNum, refNum);
}
至此,红黑树树核心原理及代码模拟已经全部梳理完毕,与上一篇博客4.【C++进阶】AVL树一样,下面给出一份更为完善的红黑树模拟实现代码,增加了查找,中序遍历,平衡检测,求高度,求节点数量等功能,并提供测试代码,供读者参考。
//RBTree.h
#pragma once
#include<iostream>
using namespace std;
enum Color
{
RED,
BLACK
};
template<class K, class V>
struct RBTreeNode
{
pair<K, V> _kv;
Color _col;
RBTreeNode* _left;
RBTreeNode* _right;
RBTreeNode* _parent;
RBTreeNode(const pair<K, V>& kv)
: _kv(kv)
, _col(BLACK)
,_left(nullptr)
,_right(nullptr)
,_parent(nullptr)
{ }
};
template<class K, class V>
class RBTree
{
typedef RBTreeNode<K, V> Node;
public:
bool Insert(const pair<K, V>& kv)
{
if (_root == nullptr)
{
_root = new Node(kv);
return true;
}
//1.二叉搜索树的逻辑查找插入位置
Node* cur = _root;
Node* parent = nullptr;
while (cur)
{
if (kv.first < cur->_kv.first)
{
parent = cur;
cur = cur->_left;
}
else if (kv.first > cur->_kv.first)
{
parent = cur;
cur = cur->_right;
}
else
return false;
}
cur = new Node(kv);
cur->_col = RED;
if (kv.first < parent->_kv.first)
parent->_left = cur;
else
parent->_right = cur;
cur->_parent = parent;
//2.找到后,分情况讨论
while (parent && parent->_col == RED)
{
Node* grandfather = parent->_parent;
Node* uncle;
if (parent == grandfather->_left)
{
uncle = grandfather->_right;
//(1)变色:c为红,p为红,g为黑,u存在且为红
if (uncle && uncle->_col == RED)
{
parent->_col = BLACK;
uncle->_col = BLACK;
grandfather->_col = RED;
//继续向上处理
cur = grandfather;
parent = grandfather->_parent;
}
//(2)旋转+变色:c为红,p为红,g为黑,u不存在或u为黑,不需要继续向上处理
else
{
//单旋+变色
// g
// p u
// c
if (cur == parent->_left)
{
RotateR(grandfather);
parent->_col = BLACK;
grandfather->_col = RED;
}
//双旋+变色:c为红,p为红,g为黑,u不存在且u为黑
// g
// p u
// c
else
{
RotateL(parent);
RotateR(grandfather);
cur->_col = BLACK;
grandfather->_col = RED;
}
}
}
else//parent == grandfather->_right
{
uncle = grandfather->_left;
//(1)变色:c为红,p为红,g为黑,u存在且为红
if (uncle && uncle->_col == RED)
{
parent->_col = BLACK;
uncle->_col = BLACK;
grandfather->_col = RED;
//继续向上处理
cur = grandfather;
parent = grandfather->_parent;
}
//(2)旋转+变色:c为红,p为红,g为黑,u不存在且u为黑,不需要继续向上处理
else
{
//单旋+变色
// g
// u p
// c
if (cur == parent->_right)
{
RotateL(grandfather);
parent->_col = BLACK;
grandfather->_col = RED;
}
//双旋+变色:c为红,p为红,g为黑,u不存在且u为黑
// g
// u p
// c
else
{
RotateR(parent);
RotateL(grandfather);
cur->_col = BLACK;
grandfather->_col = RED;
}
}
}
}
_root->_col = BLACK;
return true;
}
void InOrder()
{
_InOrder(_root);
cout << endl;
}
bool IsBalance()
{
return _IsBalance();
}
int Height()
{
return _Height(_root);
}
int Size()
{
return _Size(_root);
}
Node* Find(const K& key)
{
Node* cur = _root;
while (cur)
{
if (cur->_kv.first < key)
{
cur = cur->_right;
}
else if (cur->_kv.first > key)
{
cur = cur->_left;
}
else
{
return cur;
}
}
return nullptr;
}
private:
int _Height(Node* root)
{
if (root == nullptr)
return 0;
int leftHeight = _Height(root->_left);
int rightHeight = _Height(root->_right);
return leftHeight > rightHeight ? leftHeight + 1 : rightHeight + 1;
}
int _Size(Node* root)
{
if (root == nullptr)
return 0;
return _Size(root->_left) + _Size(root->_right) + 1;
}
bool check(Node* root, int blackNum, const int refNum)
{
if (root == nullptr)
{
//规则4:每条路径上黑色节点的数量要一致
//判断blackNum和refNum的关系
if (blackNum != refNum)
{
cout << "规则已经违反,路径上黑色节点数量不一致" << endl;
return false;
}
return true;
}
//规则3:不能有连续的红色节点
if (root->_col == RED && root->_parent->_col == RED)
{
cout << "规则已经违反,存在连续的红色节点" << endl;
return false;
}
if (root->_col == BLACK)
blackNum++;
return check(root->_left, blackNum, refNum)
&& check(root->_right, blackNum, refNum);
}
bool _IsBalance()
{
if (_root == nullptr)
return true;
if (_root->_col == RED)
return false;
int refNum = 0;
//先求一个refNum作为参考值,然后在check函数中进行比对
Node* cur = _root;
while (cur)
{
if (cur->_col == BLACK)
refNum++;
cur = cur->_left;
}
return check(_root, 0, refNum);
}
void _InOrder(Node* root)
{
if (root == nullptr)
return;
_InOrder(root->_left);
cout << root->_kv.first << " ";
_InOrder(root->_right);
}
void RotateR(Node* parent)
{
Node* subL = parent->_left;
Node* subLR = subL->_right;
Node* parentParent = parent->_parent;
if(subLR)
subLR->_parent = parent;
parent->_left = subLR;
subL->_right = parent;
parent->_parent = subL;
if (parent == _root)
{
_root = subL;
subL->_parent = nullptr;
}
else
{
if (parent == parentParent->_left)
parentParent->_left = subL;
else
parentParent->_right = subL;
subL->_parent = parentParent;
}
}
void RotateL(Node* parent)
{
Node* subR = parent->_right;
Node* subRL = subR->_left;
Node* parentParent = parent->_parent;
if (subRL)
subRL->_parent = parent;
parent->_right = subRL;
subR->_left = parent;
parent->_parent = subR;
if (parent == _root)
{
_root = subR;
subR->_parent = nullptr;
}
else
{
if (parent == parentParent->_left)
parentParent->_left = subR;
else
parentParent->_right = subR;
subR->_parent = parentParent;
}
}
private:
Node* _root = nullptr;
};
//test.cpp
#define _CRT_SECURE_NO_WARNINGS 1
#include"RBTree.h"
#include<vector>
void TestRBTree1()
{
RBTree<int, int> t;
// 常规的测试用例
// int a[] = { 16, 3, 7, 11, 9, 26, 18, 14, 15 };
// 特殊的带有双旋场景的测试用例
int a[] = { 4, 2, 6, 1, 3, 5, 15, 7, 16, 14 };
for (auto e : a)
{
t.Insert({ e, e });
//t.InOrder();
//cout << "Insert:" << e << "->" << t.IsBalanceTree() << endl;
}
t.InOrder();
cout << t.IsBalance() << endl;
}
// 插入一堆随机值,测试平衡,顺便测试一下高度和性能等
void TestRBTree2()
{
const int N = 1000000;
vector<int> v;
v.reserve(N);
srand(time(0));
for (size_t i = 0; i < N; i++)
{
v.push_back(rand() + i);
}
size_t begin2 = clock();
RBTree<int, int> t;
for (auto e : v)
{
t.Insert(make_pair(e, e));
}
size_t end2 = clock();
cout << "Insert:" << end2 - begin2 << endl;
cout << t.IsBalance() << endl;
cout << "Height:" << t.Height() << endl;
cout << "Size:" << t.Size() << endl;
size_t begin1 = clock();
// 确定在的值
/*for (auto e : v)
{
t.Find(e);
}*/
// 随机值
for (size_t i = 0; i < N; i++)
{
t.Find((rand() + i));
}
size_t end1 = clock();
cout << "Find:" << end1 - begin1 << endl;
cout << t.IsBalance() << endl;
}
int main()
{
TestRBTree2();
return 0;
}