C++ 用哈希表封装unordered_set/unordered

前言

一、源码结构分析

二、模拟实现unordered_set/unordered_map

2.1 套上KeyOfT

2.2 普通迭代器

2.3 const迭代器

2.4 解决Key不能修改的问题

2.5 unordered_map的[]实现

三、unordered_set/unordered_map以及哈希表源码

前言

本篇文章我们将使用哈希表来封装unordered_set/unordered_map。但是大家要注意，在实现前一定要先保证自己的哈希表是能跑才可以来封装，而且我们的哈希表解决冲突的方式是哈希桶，并不是线性探测法。

一、源码结构分析

SGI的3.0版本源代码中没有unordered_map/unordered_set这两个容器，SGI3.0版本是C++11之前的版本，这两个容器是C++11之后才更新的。但是SGI3.0版本实现了哈希表，容器的名字是hash_map和hash_set，他是作为非标准的容器出现的，源代码在

hash_map/hash_set/stl_hash_map/stl_hash_set/stl_hashtable.h等几个头文件中，下面是截取的一部分核心代码

// stl_hash_set.h
template <class Value, class HashFcn, class EqualKey, class Alloc = alloc>
class hash_set
{
private:
  typedef hashtable<Value, Value, HashFcn, identity<Value>, 
                    EqualKey, Alloc> ht;
  ht rep;

public:
  typedef typename ht::key_type key_type;
  typedef typename ht::value_type value_type;
  typedef typename ht::hasher hasher;
  typedef typename ht::key_equal key_equal;

  typedef typename ht::size_type size_type;
  typedef typename ht::difference_type difference_type;
  typedef typename ht::const_pointer pointer;
  typedef typename ht::const_pointer const_pointer;
  typedef typename ht::const_reference reference;
  typedef typename ht::const_reference const_reference;

  typedef typename ht::const_iterator iterator;
  typedef typename ht::const_iterator const_iterator;

// stl_hash_map.h
template <class Key, class T, class HashFcn, class EqualKey, class Alloc = alloc>
class hash_map
{
private:
  typedef hashtable<pair<const Key, T>, Key, HashFcn,
                    select1st<pair<const Key, T>>, EqualKey, Alloc> ht;
  ht rep;

public:
  typedef typename ht::key_type key_type;
  typedef T data_type;
  typedef T mapped_type;
  typedef typename ht::value_type value_type;
  typedef typename ht::hasher hasher;
  typedef typename ht::key_equal key_equal;

  typedef typename ht::size_type size_type;
  typedef typename ht::difference_type difference_type;
  typedef typename ht::pointer pointer;
  typedef typename ht::const_pointer const_pointer;
  typedef typename ht::reference reference;
  typedef typename ht::const_reference const_reference;

  typedef typename ht::iterator iterator;
  typedef typename ht::const_iterator const_iterator;

// stl_hashtable.h
template <class Value, class Key, class HashFcn,
          class ExtractKey, class EqualKey,
          class Alloc>
class hashtable {
public:
  typedef Key key_type;
  typedef Value value_type;
  typedef HashFcn hasher;
  typedef EqualKey key_equal;

  typedef size_t            size_type;
  typedef ptrdiff_t         difference_type;
  typedef value_type*       pointer;
  typedef const value_type* const_pointer;
  typedef value_type&       reference;
  typedef const value_type& const_reference;


template <class Value>
struct __hashtable_node
{
  __hashtable_node* next;
  Value val;
};

通过上图分析可以看出，和set/map是一样的，对于hash_set而言，传给hashtable的第一个模版参数是Value，这个模版参数一路向下传，最终存在节点里面的就是Value，对于hash_map而言，传给hashtable的第一个模版参数是pair<const Key, T>，这个模版参数一路向下传，最终存在节点里面的就是pair<const Key, T>。那我们自己实现的时候，我们的思想的实现set/map时是一样的，hashtable的第二个模版参数才是真正存在节点里面的，第一个模版参数是为了find和erase的时候用。

所以对unordered_set/unordered_map以及哈希表的修改如下：

namespace hx
{
	template<class K>
	class unordered_set
	{
	public:
	private:
		hash_bucket::HashTable<K, K> _ht;
	};
}

namespace hx
{
	template<class K, class V>
	class unordered_map
	{
	public:
	private:
		hash_bucket::HashTable<K, pair<K, V>> _ht;
	};
}

namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

	template<class K, class T>
	class HashTable
	{
		typedef HashNode<T> Node;
	public:
	private:
		vector<Node*> _table;
		size_t _n = 0;
	};
}

二、模拟实现unordered_set/unordered_map

模拟实现一共分为五步：

1、套上KeyOfT

2、普通迭代器

3、const迭代器

4、解决Key不能修改的问题

5、unordered_map的[]实现

2.1 套上KeyOfT

在这里的insert中，要进行取模操作，如果是key就没问题，如果是pair的话，因为pair的value是不参与计算的，所以要用一个仿函数先把key给取出来，而下层HashTable由于是泛型的原因，并不知道节点中存的是什么，但是上层知道，所以这个仿函数要从上层传进来。

namespace hx
{
	template<class K>
	class unordered_set
	{
        struct SetKeyOfT
        {
	        const K& operator()(const K& key)
	    {
		        return key;
	    }
    
	public:
        bool insert(const K& key)
        {
	        return _ht.insert(key);
        }
	private:
		hash_bucket::HashTable<K, K, SetKeyOfT> _ht;
	};
}

namespace hx
{
	template<class K, class V>
	class unordered_map
	{
        struct MapKeyOfT
        {
	        const K& operator()(const pair<K, V>& kv)
	    {
		        return kv.first;
	    }
    public:
        bool insert(const pair<K, V>& kv)
        {
	        return _ht.insert(kv);
        }
	private:
		hash_bucket::HashTable<K, K, MapKeyOfT> _ht;
	};
}

namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

	template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
	class HashTable
	{
		typedef HashNode<T> Node;
	public:
		bool insert(const T& data)
		{
			KeyOfT kot;
            Hash hs;

			if (find(kot(data)))
				return false;

			// 扩容
			if (_n / _table.size() == 1)
			{
				vector<Node*> newtable(__stl_next_prime(_table.size() + 1), nullptr);
				for (int i = 0; i < _table.size(); i++)
				{
					Node* cur = _table[i];
					while (cur)
					{
						Node* next = cur->_next;
						size_t hashi = hs(kot(cur->_data)) % newtable.size();
						cur->_next = newtable[hashi];
						newtable[hashi] = cur;
						cur = next;
					}
					_table[i] = nullptr;
				}
				_table.swap(newtable);
			}

			size_t hashi = hs(kot(data)) % _table.size();
			Node* newnode = new Node(data);

			newnode->_next = _table[hashi];
			_table[hashi] = newnode;
			++_n;

			return true;
		}
	};
}

这里套了两层仿函数，kot先把key取出来，hs再把key映射成整形

注：__stl_next_prime是一个有关于扩容的函数，在上一篇哈希表中有讲到，在这里为了省略一点篇幅就没有加上，在最后源码部分会加。

2.2 普通迭代器

iterator实现的大体框架跟list的iterator思路是一致的，用一个类型封装结点的指针，再通过重载运算符实现，迭代器像指针一样访问的行为，要注意的是哈希表的迭代器是单向迭代器。

这里的难点是operator++的实现。iterator中有一个指向结点的指针，如果当前桶下面还有结点， 则结点的指针指向下一个结点即可。如果当前桶走完了，则需要想办法计算找到下一个桶。但是迭代器里面只有一个节点的指针，那怎么才能找到下一个桶呢？实际上这里只有一个节点指针是不够的。还需要有一个哈希表指针，用key值计算出当前桶位置，依次往后找下一个不为空的桶即可。

begin()返回第一个不为空的桶中第一个节点指针构造的迭代器。

总结一下，如果下一个结点不为空，就走到下一个结点，如果下一个结点为空，就找下一个不为空的桶，如果把表都找完了，还没找到下一个不为空的桶，那就用空指针去构造迭代器就可以，end()的结束我们定义成走到空就是结束。

it在24，下一个要访问的结点是13

it在13，下一个要访问的结点是96，需要先计算出当前在几号桶，再往后找第一个不为空的桶的第一个结点。

template<class K, class T, class KeyOfT, class Hash>
struct __HashTable_Iterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef __HashTable_Iterator<K, T, KeyOfT, Hash> Self;
	Node* _node;
	HT* _ht;

	__HashTable_Iterator(Node* node, HT* ht)
		:_node(node)
		, _ht(ht)
	{}

	Self& operator++()
	{
        // 当前桶没走完
		if (_node->_next)
		{
			_node = _node->_next;
		}
        // 当前桶走完了
		else
		{
			KeyOfT kot;
			Hash hs;
            
            // 计算当前是几号桶
			size_t hashi = hs(kot(_node->_data)) % _ht->_table.size();
            // 从下一个桶开始
			hashi++;

			while (hashi < _ht->_table.size())
			{
				if (_ht->_table[hashi])
				{
					_node = _ht->_table[hashi];
					break;
				}
				else
				{
					hashi++;
				}
			}

			if (hashi == _ht->_table.size())
			{
				_node = nullptr;
			}
		}

		return *this;
	}
};

而又因为哈希表的迭代器是单向迭代器，所以没有--操作，我们把剩余的功能补全即可。

template<class K, class T, class KeyOfT, class Hash>
struct __HashTable_Iterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef __HashTable_Iterator<K, T, KeyOfT, Hash> Self;
	Node* _node;
	HT* _ht;

	__HashTable_Iterator(Node* node, HT* ht)
		:_node(node)
		, _ht(ht)
	{}

	T& operator*()
	{
		return _node->_data;
	}

	T* operator->()
	{
		return &_node->_data;
	}

	Self& operator++()
	{
        // 当前桶没走完
		if (_node->_next)
		{
			_node = _node->_next;
		}
        // 当前桶走完了
		else
		{
			KeyOfT kot;
			Hash hs;
            
            // 计算当前是几号桶    
			size_t hashi = hs(kot(_node->_data)) % _ht->_table.size();
            // 从下一个桶开始
			hashi++;
        
			while (hashi < _ht->_table.size())
			{
				if (_ht->_table[hashi])
				{
					_node = _ht->_table[hashi];
					break;
				}
				else
				{
					hashi++;
				}
			}

			if (hashi == _ht->_table.size())
			{
				_node = nullptr;
			}
		}

		return *this;
	}

	Self operator++(int)
	{
		Self tmp = *this;
		++*this;

		return tmp;
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}

	bool operator==(const Self& s)
	{
		return _node == s._node;
	}
};

template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
class HashTable
{
	typedef HashNode<T> Node;
public:
	typedef __HashTable_Iterator<K, T, KeyOfT, Hash> iterator;

现在当大家测试代码的时候会跑不通，这个测试代码不是写完了直接编译，因为编译器会按需编译，如果没有用到迭代器就不会去编译迭代器的部分，所以我们要通过迭代器访问数据来测试。这时大家会发现是编译报错的，编译报错的原因是我们在__HashTable_Iterator这个类中用到了HashTable这个类，但是又因为编译器查找的规则是向上查找，向上找不到HashTable，就报错了，那我们把HashTable类放在上面可以吗？答案是也不行，因为HashTable中又要用到__HashTable_Iterator，这两个类是互相依赖的关系，不管谁放到上面，另一个都会找不到。所以我们需要前置声明一下，告诉编译器这是一个类。

而解决了上面这个问题以外，我们再运行还是会编译报错，这是因为在++中我们计算桶的位置时，用_ht访问了_table，但是_table是HashTable中的私有成员，类外不能访问私有，所以用一个GetTable，也可以用友元，__HashTable_Iterator类要访问HashTable类的私有，那__HashTable_Iterator类就是HashTable的友元。

// 前置声明
template<class K, class T, class KeyOfT, class Hash>
class HashTable;

template<class K, class T, class KeyOfT, class Hash>
struct __HashTable_Iterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef __HashTable_Iterator<K, T, KeyOfT, Hash> Self;
	Node* _node;
	HT* _ht;

	__HashTable_Iterator(Node* node, HT* ht)
		:_node(node)
		, _ht(ht)
	{}

	T& operator*()
	{
		return _node->_data;
	}

	T* operator->()
	{
		return &_node->_data;
	}

	Self& operator++()
	{
        // 当前桶没走完
		if (_node->_next)
		{
			_node = _node->_next;
		}
        // 当前桶走完了
		else
		{
			KeyOfT kot;
			Hash hs;
            
            // 计算当前是几号桶
			size_t hashi = hs(kot(_node->_data)) % _ht->_table.size();
            // 从下一个桶开始
			hashi++;

			while (hashi < _ht->_table.size())
			{
				if (_ht->_table[hashi])
				{
					_node = _ht->_table[hashi];
					break;
				}
				else
				{
					hashi++;
				}
			}

			if (hashi == _ht->_table.size())
			{
				_node = nullptr;
			}
		}

		return *this;
	}

	Self operator++(int)
	{
		Self tmp = *this;
		++*this;

		return tmp;
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}

	bool operator==(const Self& s)
	{
		return _node == s._node;
	}
};

template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
class HashTable
{
	// 友元
	template<class K, class T, class KeyOfT, class Hash>
	friend struct __HashTable_Iterator;

	typedef HashNode<T> Node;
public:
	typedef __HashTable_Iterator<K, T, KeyOfT, Hash> iterator;
};

实现好了内部细节，接下来来实现begin和end以及给上层套一层壳

template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
class HashTable
{
	// 友元
	template<class K, class T, class KeyOfT, class Hash>
	friend struct __HashTable_Iterator;

	typedef HashNode<T> Node;
public:
	typedef __HashTable_Iterator<K, T, KeyOfT, Hash> iterator;

	iterator begin()
	{
		for (int i = 0; i < _table.size(); i++)
		{
			if (_table[i])
			{
				return iterator(_table[i], this);
			}
		}

		return iterator(nullptr, this);
	}

	iterator end()
	{
		return iterator(nullptr, this);
	}
};

namespace hx
{
	template<class K>
	class unordered_set
	{
        //...
	public:
		typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::iterator iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

	private:
		hash_bucket::HashTable<K, K, SetKeyOfT> _ht;
	};
}

namespace hx
{
	template<class K, class V>
	class unordered_map
	{
        //...
	public:
		typedef typename hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT>::iterator iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

	private:
		hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT> _ht;
	};
}

2.3 const迭代器

const迭代器和普通迭代器只有operator*和operator->的返回值不一样，所以我们再加上两个模版参数来控制即可。

template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
struct __HashTable_Iterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef __HashTable_Iterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;
	Node* _node;
	HT* _ht;

	__HashTable_Iterator(Node* node, HT* ht)
		:_node(node)
		, _ht(ht)
	{}

	Ref operator*()
	{
		return _node->_data;
	}

	Ptr operator->()
	{
		return &_node->_data;
	}

	Self& operator++()
	{
		//...
	}

	Self operator++(int)
	{
        //...
	}

	bool operator!=(const Self& s)
	{
		//...
	}

	bool operator==(const Self& s)
	{
		//...
	}
};

template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
class HashTable
{
	// 友元
	template<class K, class T, class Pef, class Ptr, class KeyOfT, class Hash>
	friend struct __HashTable_Iterator;

	typedef HashNode<T> Node;
public:
	typedef __HashTable_Iterator<K, T, T&, T*, KeyOfT, Hash> iterator;
	typedef __HashTable_Iterator<K, T, const T&, const T*, KeyOfT, Hash> const_iterator;

    iterator begin()
    {
	    for (int i = 0; i < _table.size(); i++)
	    {
		    if (_table[i])
		    {
			    return iterator(_table[i], this);
		    }
	    }

	    return iterator(nullptr, this);
    }

    iterator end()
    {
    	return iterator(nullptr, this);
    }

    const_iterator begin() const
    {
	    for (int i = 0; i < _table.size(); i++)
	    {
		    if (_table[i])
		    {
			    return const_iterator(_table[i], this);
		    }
	    }

	    return const_iterator(nullptr, this);
    }

    const_iterator end() const
    {
	    return const_iterator(nullptr, this);
    }
};

namespace hx
{
	template<class K>
	class unordered_set
	{
        //...
	public:
		typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::iterator iterator;
        typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::const_iterator const_iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

        const_iterator begin() const
		{
			return _ht.begin();
		}

		const_iterator end() const
		{
			return _ht.end();
		}

	private:
		hash_bucket::HashTable<K, K, SetKeyOfT> _ht;
	};
}

namespace hx
{
	template<class K, class V>
	class unordered_map
	{
        //...
	public:
		typedef typename hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT>::iterator iterator;
		typedef typename hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT>::const_iterator const_iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

        const_iterator begin() const
		{
			return _ht.begin();
		}

		const_iterator end() const
		{
			return _ht.end();
		}


	private:
		hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT> _ht;
	};
}

当我们测试的时候，又会出编译报错

这个地方提示构造函数调不到，这是为什么呢？我们来分析一下，this指针是类类型对象的指针，这个end是个const成员函数，const修饰的是this，this的类型是const HashTable* const this，而在迭代器中，_ht是没有const修饰的，所以这是一个权限放大，传不上去，我们在迭代器中把_ht设计成const的就可以了。

template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
struct __HashTable_Iterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef __HashTable_Iterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;
	Node* _node;
	const HT* _ht;

	__HashTable_Iterator(Node* node, const HT* ht)
		:_node(node)
		, _ht(ht)
	{}
};

2.4 解决Key不能修改的问题

就在要在结点中存的参数前加上const修饰，注意：unordered_map中不要加在pair前，因为value是可以修改的，所以加在Key前就可以。

namespace hx
{
	template<class K>
	class unordered_set
	public:
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT>::iterator iterator;
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT>::const_iterator const_iterator;

	private:
		hash_bucket::HashTable<K, const K, SetKeyOfT> _ht;
	};
}

namespace hx
{
	template<class K, class V>
	class unordered_map
	{
	public:
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT>::iterator iterator;
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT>::const_iterator const_iterator;

	private:
		hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT> _ht;
	};
}

2.5 unordered_map的[]实现

要实现[]首先要把insert的返回值改成pair<iterator, bool>，而insert里又要先调用find，所以我们先把find的返回值改成迭代器

iterator find(const K& key)
{
	KeyOfT kot;
	Hash hs;

	size_t hashi = hs(key) % _table.size();
	Node* cur = _table[hashi];
	while (cur)
	{
		if (kot(cur->_data) == key)
		{
			return iterator(cur, this);
		}

		cur = cur->_next;
	}

	return iterator(nullptr, this);
}

pair<iterator, bool> insert(const T& data)
{
	KeyOfT kot;
	Hash hs;

	iterator it = find(kot(data));
	if (it._node)
		return { it, false };


	// 扩容
	if (_n / _table.size() == 1)
	{
		//...
	}

	size_t hashi = hs(kot(data)) % _table.size();
	Node* newnode = new Node(data);

	newnode->_next = _table[hashi];
	_table[hashi] = newnode;
	++_n;

	return{ iterator(newnode, this), true };
}

V& operator[](const K& key)
{
	pair<iterator, bool> ret = insert({ key, V() });
	return ret.first->second;
}

[]的实现是先去调用insert，插入一个value的匿名对象就可以，会去调用value的默认构造，并且接收返回值，ret.first拿到的就是迭代器对象，里面存储的就是节点的指针，不管这个key是之前就存在，还是新插入进去的，都是这个节点的指针，迭代器对象去调用operator->拿到pair的地址，再解引用拿second，为了可读性两个->省略成了一个。

注：那到了这里我们的模拟实现可以说是已经没有什么问题，都可以跑通了，但是还有一个问题，就是HashFunc这个缺省参数，对于使用的人来说，并不知道底层哈希表的结构，如果我想自己控制的时候，没有办法传进去，所以我们需要在上层再增加一个模版参数，在上层传进来，把缺省参数的位置向外移了一层。其次就是上层的find和erase，直接套层壳就好了，这两点我们在这里不单独看代码了，在最后的源码中会写。

三、unordered_set/unordered_map以及哈希表源码

3.1 HashTable.h

#include<vector>

static const int __stl_num_primes = 28;
static const unsigned long __stl_prime_list[__stl_num_primes] =
{
  53,         97,         193,       389,       769,
  1543,       3079,       6151,      12289,     24593,
  49157,      98317,      196613,    393241,    786433,
  1572869,    3145739,    6291469,   12582917,  25165843,
  50331653,   100663319,  201326611, 402653189, 805306457,
  1610612741, 3221225473, 4294967291
};

inline unsigned long __stl_next_prime(unsigned long n)
{
	const unsigned long* first = __stl_prime_list;
	const unsigned long* last = __stl_prime_list + __stl_num_primes;
	const unsigned long* pos = lower_bound(first, last, n);
	return pos == last ? *(last - 1) : *pos;
}

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

// 特化
template<>
struct HashFunc<string>
{
	size_t operator()(const string& str)
	{
		size_t sum = 0;
		for (auto ch : str)
		{
			sum *= 131;
			sum += ch;
		}

		return sum;
	}
};

namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

	// 前置声明
	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;

	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	struct __HashTable_Iterator
	{
		typedef HashNode<T> Node;
		typedef HashTable<K, T, KeyOfT, Hash> HT;
		typedef __HashTable_Iterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;
		Node* _node;
		const HT* _ht;

		__HashTable_Iterator(Node* node, const HT* ht)
			:_node(node)
			, _ht(ht)
		{}

		Ref operator*()
		{
			return _node->_data;
		}

		Ptr operator->()
		{
			return &_node->_data;
		}

		Self& operator++()
		{
			// 当前桶没走完
			if (_node->_next)
			{
				_node = _node->_next;
			}
			// 当前桶走完了
			else
			{
				KeyOfT kot;
				Hash hs;

				// 计算当前是几号桶
				size_t hashi = hs(kot(_node->_data)) % _ht->_table.size();
				// 从下一个桶开始
				hashi++;

				while (hashi < _ht->_table.size())
				{
					if (_ht->_table[hashi])
					{
						_node = _ht->_table[hashi];
						break;
					}
					else
					{
						hashi++;
					}
				}

				if (hashi == _ht->_table.size())
				{
					_node = nullptr;
				}
			}

			return *this;
		}

		Self operator++(int)
		{
			Self tmp = *this;
			++*this;

			return tmp;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}

		bool operator==(const Self& s)
		{
			return _node == s._node;
		}
	};

	template<class K, class T, class KeyOfT, class Hash>
	class HashTable
	{
		// 友元
		template<class K, class T, class Pef, class Ptr, class KeyOfT, class Hash>
		friend struct __HashTable_Iterator;

		typedef HashNode<T> Node;
	public:
		typedef __HashTable_Iterator<K, T, T&, T*, KeyOfT, Hash> iterator;
		typedef __HashTable_Iterator<K, T, const T&, const T*, KeyOfT, Hash> const_iterator;

		iterator begin()
		{
			for (int i = 0; i < _table.size(); i++)
			{
				if (_table[i])
				{
					return iterator(_table[i], this);
				}
			}

			return iterator(nullptr, this);
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}

		const_iterator begin() const
		{
			for (int i = 0; i < _table.size(); i++)
			{
				if (_table[i])
				{
					return const_iterator(_table[i], this);
				}
			}

			return const_iterator(nullptr, this);
		}

		const_iterator end() const
		{
			return const_iterator(nullptr, this);
		}

		HashTable(size_t size = __stl_next_prime(0))
		{
			_table.resize(size, nullptr);
		}
        
        // 析构
		~HashTable()
		{
			for (int i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
		}
        
        // 拷贝构造，调用插入，一个一个插入就可以
		HashTable(const HashTable<K, T, KeyOfT, Hash>& ht)
		{
			_table.resize(ht._table.size(), nullptr);
			for (int i = 0; i < ht._table.size(); i++)
			{
				Node* cur = ht._table[i];
				while (cur)
				{
					insert(cur->_kv);
					cur = cur->_next;
				}
			}
		}

		// hb2 = hb1
        // 赋值，现代写法，调用拷贝构造神拷贝
		HashTable<K, T, KeyOfT, Hash>& operator=(HashTable<K, T, KeyOfT, Hash> ht)
		{
			swap(_table, ht._table);
			swap(_n, ht._n);

			return *this;
		}

		pair<iterator, bool> insert(const T& data)
		{
			KeyOfT kot;
			Hash hs;

			iterator it = find(kot(data));
			if (it._node)
				return { it, false };


			// 扩容
			if (_n / _table.size() == 1)
			{
				vector<Node*> newtable(__stl_next_prime(_table.size() + 1), nullptr);
				for (int i = 0; i < _table.size(); i++)
				{
					Node* cur = _table[i];
					while (cur)
					{
						Node* next = cur->_next;
						size_t hashi = hs(kot(cur->_data)) % newtable.size();
						cur->_next = newtable[hashi];
						newtable[hashi] = cur;
						cur = next;
					}
					_table[i] = nullptr;
				}
				_table.swap(newtable);
			}

			size_t hashi = hs(kot(data)) % _table.size();
			Node* newnode = new Node(data);

			newnode->_next = _table[hashi];
			_table[hashi] = newnode;
			++_n;

			return{ iterator(newnode, this), true };
		}

		iterator find(const K& key)
		{
			KeyOfT kot;
			Hash hs;

			size_t hashi = hs(key) % _table.size();
			Node* cur = _table[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return iterator(cur, this);
				}

				cur = cur->_next;
			}

			return iterator(nullptr, this);
		}

		bool erase(const K& key)
		{
			KeyOfT kot;
			Hash hs;

			size_t hashi = hs(key) % _table.size();
			Node* cur = _table[hashi];
			Node* prev = nullptr;
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						_table[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}

					--_n;
					delete cur;

					return true;
				}

				prev = cur;
				cur = cur->_next;
			}

			return false;
		}

	private:
		vector<Node*> _table;
		size_t _n = 0;
	};
}

3.2 unordered_set

namespace hx
{
	template<class K, class Hash = HashFunc<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};

	public:
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::iterator iterator;
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::const_iterator const_iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		const_iterator begin() const
		{
			return _ht.begin();
		}

		const_iterator end() const
		{
			return _ht.end();
		}

		pair<iterator, bool> insert(const K& key)
		{
			return _ht.insert(key);
		}

		iterator find(const K& key)
		{
			return _ht.find(key);
		}

		bool erase(const K& key)
		{
			return _ht.erase(key);
		}

	private:
		hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;
	};
}

3.3 unordered_map

namespace hx
{
	template<class K, class V, class Hash = HashFunc<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};

	public:
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::iterator iterator;
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::const_iterator const_iterator;

		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		const_iterator begin() const
		{
			return _ht.begin();
		}

		const_iterator end() const
		{
			return _ht.end();
		}

		pair<iterator, bool> insert(const pair<K, V>& kv)
		{
			return _ht.insert(kv);
		}

		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = insert({ key, V() });
			return ret.first->second;
		}

		iterator find(const K& key)
		{
			return _ht.find(key);
		}

		bool erase(const K& key)
		{
			return _ht.erase(key);
		}

	private:
		hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;
	};
}

总结

在封装哈希表的时候，大的思路和方法和封装set/map时是一样的，可能在迭代器这块哈希表还简单一些，但是其他的细节，比如前置声明、友元、权限放大这些细节，还是需要细心才能发现，而且在这里这个类型非常长，很多模版又会互相牵扯，所以报错就非常多，非常长，大家还是一定要能耐心思考，发现问题，那本篇文章到这里就结束了，如果大家觉得小编写的还不错，可以给一个三连表示支持，谢谢大家！！！

C++ 用哈希表封装unordered_set/unordered_map

前言

一、源码结构分析

二、模拟实现unordered_set/unordered_map

2.1 套上KeyOfT

2.2 普通迭代器

2.3 const迭代器

2.4 解决Key不能修改的问题

2.5 unordered_map的[]实现

三、unordered_set/unordered_map以及哈希表源码

3.1 HashTable.h

3.2 unordered_set

3.3 unordered_map

总结

网站公告

今日签到

热门文章

最新发布