C++实现 HashMap LinkedHashMap HashSet 源码 效率极高 图解 完全本人手写 带效率测试源码

发布于:2023-01-14 ⋅ 阅读:(222) ⋅ 点赞:(0)

        C++ HashMap 源码。源码提供了LinkedHashMap以及HashSet。(HashSet的使用很简单,且基于HashMap,因此下面不做介绍)

        本源码和std::unordered_map在读写性能、内存占用均非常接近,但能在高速查找的同时保证了容器内的键值对是有序的。

        本LinkedHashMap虽然和顶级的C++HashMap相比有许多差距,但代码只有单个.hpp文件,总共800多行,非常的轻便,但性能上却并不含糊,使用习惯上接近Java。我在VS以及QT中编译都能顺利通过,并且运行没碰到过bug。你可以在一些小项目中使用它,希望能帮到你。

        此代码本人纯手写,希望能和大家讨论,也希望多多支持。

        本HashMap的键为对象时,需要提供 public size_t hashCode() 以及 bool operator==(const T&)

        本HashMap全名为    【 wLinkedHashMap 】

wLinkedHashMap成员函数

1.往容器中添加 void put(const KEY&, const DATA&);

    wLinkedHashMap<size_t, size_t> map;
    map.put(12, 12);

2.判断容器中是否包含 bool contain(const KEY&);

    wLinkedHashMap<size_t, size_t> map;
    map.put(12, 12);
    bool contain = map.contain(12);

3.移除对应元素 void remove(const KEY&);

    wLinkedHashMap<size_t, size_t> map;
    map.put(12, 12);
    map.remove(12);

4.清空容器 size_t clear();

    wLinkedHashMap<size_t, size_t> map;
    map.put(12, 12);
    map.clear();

5.容器存储元素个数 size_t size();

    wLinkedHashMap<size_t, size_t> map;
    map.put(12, 12);
    map.size();

6.获得对应键的迭代器 wLinkedHashMap::Iterator getNode(const KEY&);

    wLinkedHashMap<size_t, size_t> map;
    map.put(12, 12);
    auto iter = map.getNode(12);
    size_t key   = iter.key  ();    //读取键
    size_t value = iter.value();    //读取值

7.获得容器首个键值对的迭代器 wLinkedHashMap::Iterator firstNode(const KEY&);    用于正向迭代。

    wLinkedHashMap<size_t, size_t> map;
    map.put(12, 12);
    //正向迭代 迭代器的使用方法
    for (auto iter = map.firstNode(); iter.notNull(); iter.next())
    {
        size_t key   = iter.key  ();    //读取键
        size_t value = iter.value();    //读取值
    }

8.获得容器末尾键值对的迭代器 wLinkedHashMap::Iterator lastNode(const KEY&);    用于反向迭代。

    wLinkedHashMap<size_t, size_t> map;
    map.put(12, 12);
    //反向迭代 迭代器的使用方法
    for (auto iter = map.lastNode(); iter.notNull(); iter.prev())
    {
        size_t key   = iter.key  ();    //读取键
        size_t value = iter.value();    //读取值
    }


 

 

关于本HashMap的大致思路(欢迎讨论)
 

        本HashMap的数据是以树状结构存储的。

efa2433b3b1347b6816ef185090059d8.png

数据结构示意

 

        树的每个节点/页(Page)都是4比特(长度16),分支中的每个存储位可以是不同类型。

        如下代码中可看到,每个节点/页(Page)可以为数据(KeyvalueNode)、下一个节点/页(Page)、下一个长跳节点(JumpNode),如果共用了键也可以为数据集合(KeyvalueNodeVector)

		union uData
		{
			Page* page;
			JumpNode* jumpNode;
			KeyvalueNode* keyvalue;
            KeyvalueNodeVector* multiKeyvalue;
		}*datas;

         查找键时,从HashCode的头开始,每次取出4比特作为每页的序号,查找当前页的当前序号中的数据,如果为数据,那么取出;如果是节点/页,那么到这个节点/页中继续往下查找。

 

 



 

 

下面有源码以及测试代码

源码

#ifndef __wHashMap__
#define __wHashMap__

typedef size_t WHMHash;

struct wHashMap_HashCode
{
	template<class T>
	size_t operator()(const T& v) { return v.hashCode(); }
	size_t operator()(double  v) { if (sizeof(size_t) == sizeof(v)) return *(size_t*)&v; else return *(unsigned __int32*)&v; }
	size_t operator()(float   v) { if (sizeof(size_t) == sizeof(v)) return *(size_t*)&v; else return *(unsigned __int32*)&v; }
	size_t operator()(         __int64 v) { return v; }
	size_t operator()(         __int32 v) { return v; }
	size_t operator()(         __int16 v) { return v; }
	size_t operator()(         __int8  v) { return v; }
	size_t operator()(unsigned __int64 v) { return v; }
	size_t operator()(unsigned __int32 v) { return v; }
	size_t operator()(unsigned __int16 v) { return v; }
	size_t operator()(unsigned __int8  v) { return v; }
	size_t operator()(         long    v) { return v; }
	size_t operator()(unsigned long    v) { return v; }
	size_t operator()(const char* str) { size_t hash = 0; while (size_t ch = (size_t)*str++) hash = hash * 131 + ch; return hash; }
	size_t operator()(const void* v) { return *(size_t*)v; }
};
struct wHashMap_Equal
{
	template<class T>
	bool operator()(const T& l, const T& r) { return l == r; }
	bool operator()(const char* l, const char* r) { return !strcmp(l, r); }
};


template<class WHMKey, class WHMValue, class HashFunction = wHashMap_HashCode, class EqualFunction = wHashMap_Equal>
class wLinkedHashMap
{
private:
	class KeyvalueNode
	{
		friend class wLinkedHashMap;
		friend class Iterator;
	public:
		WHMKey key;
		WHMValue value;
		~KeyvalueNode() { }
		KeyvalueNode(const WHMKey& key, WHMValue&& value) : value(std::move(value)), key(key), _next(0), _prev(0) { }
	protected:
		KeyvalueNode* _next, * _prev;
		void breakLink()
		{
			if (_prev) _prev->_next = _next;
			if (_next) _next->_prev = _prev;
			_next = 0;
			_prev = 0;
		}
		void linkToNext(KeyvalueNode* linkThisToNext)
		{
			if (_next) _next->_prev = linkThisToNext;
			linkThisToNext->_next = _next;
			linkThisToNext->_prev = this;
			_next = linkThisToNext;
		}
		void linkToPrev(KeyvalueNode* linkThisToPrev)
		{
			if (_prev) _prev->_next = linkThisToPrev;
			linkThisToPrev->_prev = _prev;
			linkThisToPrev->_next = this;
			_prev = linkThisToPrev;
		}
	};
public:
	class Iterator
	{
	public:
		Iterator(KeyvalueNode* node) : node(node) {}
		Iterator& next() { node = node->_next; return *this; }
		Iterator& prev() { node = node->_prev; return *this; }
		bool hasNext() { return node->_next != 0; }
		bool hasPrev() { return node->_prev != 0; }
		const WHMKey  & key  () const { return node->key  ; }
		const WHMValue& value() const { return node->value; }
		      WHMValue& value()       { return node->value; }
		bool isNull () const { return node == 0; }
		bool notNull() const { return node != 0; }
	private:
		KeyvalueNode* node;
	};
public:
	wLinkedHashMap() : keyvalueCount(0), headKeyvalue(0), backKeyvalue(0), TopPage(new Page), lastAccess(0) { }
	wLinkedHashMap(wLinkedHashMap&& v) : keyvalueCount(0), headKeyvalue(0), backKeyvalue(0), TopPage(new Page), lastAccess(0) { swap(v); }
	void swap(wLinkedHashMap& v)
	{
		int           count   = this->keyvalueCount; this->keyvalueCount = v.keyvalueCount; v.keyvalueCount = count  ;
		Page*         topPage = this->TopPage      ; this->TopPage       = v.TopPage      ; v.TopPage       = topPage;
		KeyvalueNode* head    = this->headKeyvalue ; this->headKeyvalue  = v.headKeyvalue ; v.headKeyvalue  = head   ;
		KeyvalueNode* back    = this->backKeyvalue ; this->backKeyvalue  = v.backKeyvalue ; v.backKeyvalue  = back   ;
	}
	wLinkedHashMap(const wLinkedHashMap& v) = delete;
	~wLinkedHashMap() { delete TopPage; }
private:
	static const size_t __uIntSize = sizeof(WHMHash) * 8;			//64CPU:32	32CPU:32	----
	static const size_t __biteSize = 4;								//4						----每页4字节长度的数据量
	static const size_t __pageSize = 1 << __biteSize;				//16					----每页长度
	static const size_t __pageLayerCount = __uIntSize / __biteSize;	//64CPU:8	32CPU:8		----层深
	static const bool tPositive_fNegative = false;

	//inline static WHMHash getHash(const WHMKey& inputKey) { return inputKey.hashCode(); }
	inline static WHMHash getHashCode(const WHMKey& key) { return HashFunction().operator()(key); }
	inline static bool Equal(const WHMKey& l, const WHMKey& r) { return EqualFunction().operator()(l, r); }

	class Page;
	class JumpNode
	{
	public:
		JumpNode() { memset(this, 0, sizeof(JumpNode)); }
		JumpNode(Page* page, WHMHash joinHashCode) : page(page) { pageHashCode = joinHashCode & getMask(page->layer); }
		~JumpNode() { if (page) delete page; }
		Page* page;
		WHMHash pageHashCode;
		bool canbeJoin(WHMHash joinHashCode) { return (getMask(page->layer) & joinHashCode) == pageHashCode; }
		inline WHMHash getMask(unsigned __int8 layer)
		{
			//顺序读取,先读高位,再读低位
			if (tPositive_fNegative)
				return ~((~(WHMHash)0) >> (page->layer * 4));
			//逆序读取,先读低位,再读高位
			else
				return ~((~(WHMHash)0) << (page->layer * 4));
		}
	};
	class KeyvalueList
	{
	public:
		KeyvalueList(KeyvalueNode* keyvalue) : keyvalue(keyvalue), next(0) {}
		KeyvalueNode* keyvalue;
		KeyvalueList* next;
	};
	class KeyvalueNodeVector
	{
	public:
        KeyvalueNodeVector(KeyvalueNode* v0, KeyvalueNode* v1) : size(2), datas(new KeyvalueNode*[2]) { datas[0] = v0; datas[1] = v1; }
        ~KeyvalueNodeVector() { delete[] datas; }
		size_t size;
		KeyvalueNode** datas;
		void push(KeyvalueNode* v)
		{
			KeyvalueNode** newdatas = new KeyvalueNode*[size + 1];
			memcpy(newdatas, datas, sizeof(KeyvalueNode*) * size);
            newdatas[size++] = v;
            delete datas;
            datas = newdatas;
		}
        void notSave(size_t index)
        {
            KeyvalueNode** newdatas = new KeyvalueNode*[size - 1];
            memcpy(newdatas, datas, sizeof(KeyvalueNode*) * index);
            memcpy(&newdatas[index], &datas[index + 1], sizeof(KeyvalueNode*) * (size - index - 1));
            delete datas;
            datas = newdatas;
        }
	};

	class Page
	{
	public:
		static const unsigned __int8 __offData = 1 << 4;    //0b 0001 0000
		static const unsigned __int8 __offPage = 1 << 5;    //0b 0010 0000
		static const unsigned __int8 __offJump = 1 << 6;    //0b 0100 0000
		static const unsigned __int8 __offList = 1 << 7;    //0b 1000 0000
		static const unsigned __int8 __firstSize = 2;
		static const unsigned __int8 __maxSpareSize = 4;

		inline static bool isDataIndex(unsigned __int8 index) { return index & __offData; }
		inline static bool isListIndex(unsigned __int8 index) { return index & __offList; }
		inline static bool isPageIndex(unsigned __int8 index) { return index & __offPage; }
		inline static bool isJumpIndex(unsigned __int8 index) { return index & __offJump; }
		inline static unsigned __int8 toListIndex(unsigned __int8 index) { return index & 0xF | __offData | __offList; }
		inline static unsigned __int8 toDataIndex(unsigned __int8 index) { return index & 0xF | __offData; }
		inline static unsigned __int8 toPageIndex(unsigned __int8 index) { return index & 0xF | __offPage; }
		inline static unsigned __int8 toJumpIndex(unsigned __int8 index) { return index & 0xF | __offJump; }
		inline static unsigned __int8 getRealIndex(unsigned __int8 index) { return index & 0xF; }
		//将包含了index和包含了标识符的数据整合
		inline static unsigned __int8 blendIndexFlag(unsigned __int8 includeIndex, unsigned __int8 includeFlag) { return (includeIndex & 0xF) | (includeFlag & 0xF0); }


		union uData
		{
			Page* page;
			JumpNode* jumpNode;
			KeyvalueNode* keyvalue;
            KeyvalueNodeVector* multiKeyvalue;
		}*datas;

		Page() { memset(this, 0, sizeof(Page)); constrain = __firstSize; datas = new uData[__firstSize]; }
		~Page()
		{
			for (unsigned __int8 i = 0; i < top; i++)
			{
				unsigned __int8 physicalIndex = indexes[i];
				if (physicalIndex)
				{
					if (isDataIndex(physicalIndex)) delete datas[getRealIndex(physicalIndex)].keyvalue;
					else if (isPageIndex(physicalIndex)) delete datas[getRealIndex(physicalIndex)].page;
					else if (isJumpIndex(physicalIndex)) delete datas[getRealIndex(physicalIndex)].jumpNode;
				}
			}
			delete[] datas;
		}
		//count:当页存储的数据量
		//constrain:如果是data==multiData时,记录为空间的长度。如果为oneData时,记录为0
		//top:顶部数据位置。例如data==multiData时,当页数据count=10,空间constrain=12,最后面一个数据在index=10的位置,那么top记录为11
		unsigned __int8 count, constrain, top, layer;
		unsigned __int8 indexes[__pageSize];

		inline unsigned __int8 firstHasDataIndex() { for (unsigned __int8 i = 0; i < __pageSize; i++) { if (indexes[i]) return i; } return -1; }
		//获得任意的比当前层更深(或者说更精确,更长)的hashcode
		WHMHash getRandomHashCode()
		{
			unsigned __int8 physicalIndexHasData = indexes[firstHasDataIndex()];
			if (isDataIndex(physicalIndexHasData))
			{
				if (isListIndex(physicalIndexHasData)) return getHashCode(datas[getRealIndex(physicalIndexHasData)].multiKeyvalue->datas[0]->key);
				else return getHashCode(datas[getRealIndex(physicalIndexHasData)].keyvalue->key);
			}
			if (isPageIndex(physicalIndexHasData)) return datas[getRealIndex(physicalIndexHasData)].page->getRandomHashCode();
			if (isJumpIndex(physicalIndexHasData)) return datas[getRealIndex(physicalIndexHasData)].jumpNode->pageHashCode;
		}

		//在多个数据且不和其他冲突的条件下,插入一个keyvalue。注意:函数不做冲突检测
		void insertKeyvalue(KeyvalueNode* v, unsigned __int8 layerIndex)
		{
			//如果容量充足
			if (constrain > count)
			{
				unsigned __int8 insertPhysicalIndex;
				if (top < constrain)
				{
					insertPhysicalIndex = top;
					top++;
				}
				else
				{
					insertPhysicalIndex = 0;
					while (true)
					{
						if (datas[insertPhysicalIndex].page == 0) break;
						if (++insertPhysicalIndex >= __pageSize) throw getException("wHashMap Error.", __LINE__);
					}
				}
				indexes[layerIndex] = toDataIndex(insertPhysicalIndex);
				datas[insertPhysicalIndex].keyvalue = v;
			}
			//如果容量不足,重新设置空间
			else
			{
				uData* newDatas = new uData[++constrain];
				memcpy(newDatas, datas, sizeof(uData) * count);
				newDatas[top].keyvalue = v;
				delete[] datas;
				datas = newDatas;
				indexes[layerIndex] = toDataIndex(top++);
			}
			count++;
		}
		//置0操作 。没有内存释放
		void easyRemove(unsigned __int8 layerIndex, unsigned __int8 physicalIndex)
		{
			datas[getRealIndex(physicalIndex)].keyvalue = 0;
			indexes[layerIndex] = 0;
			while (top > 0 && indexes[top - 1] == 0) top--;
			count--;
		}
		//强制紧缩空间
		void shrink()
		{
			uData* newdatas = new uData[count];
			constrain = count;
			top = count;
			unsigned __int8 p = 0, physicalIndex;
			for (unsigned __int8 i = 0; i < __pageSize; i++)
			{
				if (indexes[i])
				{
					newdatas[p] = datas[getRealIndex(indexes[i])];
					indexes[i] = blendIndexFlag(p, indexes[i]);
					p++;
				}
			}
			delete[] datas;
			datas = newdatas;
		}
		//如果下一个page数据量为1时调用将其删除
		void translatNextPage(unsigned __int8 layerIndex, Page* nextPage)
		{
			unsigned __int8 layerIndexInNext = nextPage->firstHasDataIndex();
			unsigned __int8 physicalIndexInNext = nextPage->indexes[layerIndexInNext];


			//将被移除层剩余数据为keyvalue
			if (isDataIndex(physicalIndexInNext))
			{
				if (isListIndex(physicalIndexInNext))
				{
					KeyvalueNodeVector* theList = nextPage->datas[getRealIndex(physicalIndexInNext)].multiKeyvalue;
					nextPage->easyRemove(layerIndexInNext, physicalIndexInNext);

					if (isPageIndex(indexes[layerIndex])) delete datas[getRealIndex(indexes[layerIndex])].page;
					else if (isJumpIndex(indexes[layerIndex])) delete datas[getRealIndex(indexes[layerIndex])].jumpNode;
					else throw getException("wHashMap Error.", __LINE__);

					indexes[layerIndex] = toListIndex(indexes[layerIndex]);
					datas[getRealIndex(indexes[layerIndex])].multiKeyvalue = theList;
				}
				else
				{
					KeyvalueNode* theData = nextPage->datas[getRealIndex(physicalIndexInNext)].keyvalue;
					nextPage->easyRemove(layerIndexInNext, physicalIndexInNext);

					if (isPageIndex(indexes[layerIndex])) delete datas[getRealIndex(indexes[layerIndex])].page;
					else if (isJumpIndex(indexes[layerIndex])) delete datas[getRealIndex(indexes[layerIndex])].jumpNode;
					else throw getException("wHashMap Error.", __LINE__);

					indexes[layerIndex] = toDataIndex(indexes[layerIndex]);
					datas[getRealIndex(indexes[layerIndex])].keyvalue = theData;
				}
			}
			//将被移除层剩余数据为Page,那么将变为非连续page
			else if (isPageIndex(physicalIndexInNext))
			{
				Page* thePage = nextPage->datas[getRealIndex(physicalIndexInNext)].page;
				nextPage->easyRemove(layerIndexInNext, physicalIndexInNext);

				if (isPageIndex(indexes[layerIndex])) delete datas[getRealIndex(indexes[layerIndex])].page;
				else if (isJumpIndex(indexes[layerIndex])) delete datas[getRealIndex(indexes[layerIndex])].jumpNode;
				else throw getException("wHashMap Error.", __LINE__);

				indexes[layerIndex] = toJumpIndex(indexes[layerIndex]);
				datas[getRealIndex(indexes[layerIndex])].jumpNode = new JumpNode(thePage, thePage->getRandomHashCode());
			}
			//将被移除层剩余数据为非连续Page,那么直接将这个非连续Page连接
			else
			{
				JumpNode* theJPage = nextPage->datas[getRealIndex(physicalIndexInNext)].jumpNode;
				nextPage->easyRemove(layerIndexInNext, physicalIndexInNext);

				if (isPageIndex(indexes[layerIndex])) delete datas[getRealIndex(indexes[layerIndex])].page;
				else if (isJumpIndex(indexes[layerIndex])) delete datas[getRealIndex(indexes[layerIndex])].jumpNode;
				else throw getException("wHashMap Error.", __LINE__);

				indexes[layerIndex] = toJumpIndex(indexes[layerIndex]);
				datas[getRealIndex(indexes[layerIndex])].jumpNode = theJPage;
			}
		}

		//将某个位置存储的数据替换为Page。注意:函数不做冲突或空间检测
		void replaceInMultiData(Page* v, unsigned __int8 layerIndex)
		{
			indexes[layerIndex] = toPageIndex(indexes[layerIndex]);
			datas[getRealIndex(indexes[layerIndex])].page = v;
		}
		//将某个位置存储的数据替换为JumpPage。注意:函数不做冲突或空间检测
		void replaceInMultiData(JumpNode* v, unsigned __int8 layerIndex)
		{
			indexes[layerIndex] = toJumpIndex(indexes[layerIndex]);
			datas[getRealIndex(indexes[layerIndex])].jumpNode = v;
		}

		void easySet(unsigned __int8 index, unsigned __int8 physicalIndex, KeyvalueNode* v)
		{
			indexes[index] = toDataIndex(physicalIndex);
			datas[physicalIndex].keyvalue = v;
		}
		void easySet(unsigned __int8 index, unsigned __int8 physicalIndex, KeyvalueNodeVector* v)
		{
			indexes[index] = toListIndex(physicalIndex);
			datas[physicalIndex].multiKeyvalue = v;
		}
		void easySet(unsigned __int8 index, unsigned __int8 physicalIndex, Page* v)
		{
			indexes[index] = toPageIndex(physicalIndex);
			datas[physicalIndex].page = v;
		}
		void easySet(unsigned __int8 index, unsigned __int8 physicalIndex, JumpNode* v)
		{
			indexes[index] = toJumpIndex(physicalIndex);
			datas[physicalIndex].jumpNode = v;
		}


		enum class IndexState { Empty, Data, Page, JumpPage };
		inline IndexState getIndexState(unsigned __int8 index)
		{
			unsigned __int8 physicalIndex = indexes[index];
			if (physicalIndex)
			{
				if (physicalIndex & __offPage) return IndexState::Page;
				if (physicalIndex & __offData) return IndexState::Data;
				if (physicalIndex & __offJump) return IndexState::JumpPage;
			}
			else return IndexState::Empty;
		}

	};

	Page* TopPage;

	size_t keyvalueCount;

	KeyvalueNode* headKeyvalue, * backKeyvalue;



	//输入hashcode,获得在某一层处的index
	inline static unsigned __int8 hashInLayerIndex(WHMHash hashCode, unsigned __int8 layer)
	{
		//顺序读取,先读高位,再读低位
		if (tPositive_fNegative)
		{
			return layer & 1 ?
				(((unsigned __int8*)&hashCode)[(__pageLayerCount - 1 - layer) >> 1]) & 0xF :
				(((unsigned __int8*)&hashCode)[(__pageLayerCount - 1 - layer) >> 1]) >> 4;
		}
		//逆序读取,先读低位,再读高位
		else
		{
			return layer & 1 ?
				(((unsigned __int8*)&hashCode)[layer >> 1]) >> 4 :
				(((unsigned __int8*)&hashCode)[layer >> 1]) & 0xF;
		}
	}

	inline KeyvalueNode* generateKeyvalue(const WHMKey& inputKey, WHMValue&& inputValue)
	{
		KeyvalueNode* res = new KeyvalueNode(inputKey, std::move(inputValue));
		if (backKeyvalue)
		{
			backKeyvalue->linkToNext(res);
			backKeyvalue = res;
		}
		else
		{
			headKeyvalue = res;
			backKeyvalue = res;
		}
		keyvalueCount++;
		lastAccess = res;
		return res;
	}

	//输入两个hashcode,获得他们可以分道扬镳的层
	inline static unsigned __int8 combinateInLayer(WHMHash hashA, WHMHash hashB)
	{
		for (unsigned __int8 i = 0; i < __pageLayerCount; i++)
		{
			if (hashInLayerIndex(hashA, i) != hashInLayerIndex(hashB, i))
				return i;
		}
	}

	void breakLink(KeyvalueNode* node)
	{
		keyvalueCount--;
		if (keyvalueCount)
		{
			if (headKeyvalue == node) headKeyvalue = node->_next;
			if (backKeyvalue == node) backKeyvalue = node->_prev;
		}
		else
		{
			headKeyvalue = 0;
			backKeyvalue = 0;
		}
		node->breakLink();
	}


	KeyvalueNode* getKeyvalue(const WHMKey& inputKey)
	{
		if (isLastAccess(inputKey)) return lastAccess;

		Page* currentPage = TopPage;
		WHMHash hashCode = getHashCode(inputKey);

		while (true)
		{
			unsigned __int8 layerIndex = hashInLayerIndex(hashCode, currentPage->layer);
			unsigned __int8 physicalIndex = currentPage->indexes[layerIndex];

			if (physicalIndex)
			{
				//已有的数据为Data
				if (Page::isDataIndex(physicalIndex))
				{
					if (Page::isListIndex(physicalIndex))
					{
                        KeyvalueNodeVector* list = currentPage->datas[Page::getRealIndex(physicalIndex)].multiKeyvalue;
                        for (int i = 0; i < list->size; i++)
                        {
							KeyvalueNode* node = list->datas[i];
							if (Equal(node->key, inputKey))
							{
								lastAccess = node;
								return node;
							}
                        }
					}
					else
					{
                        KeyvalueNode* node = currentPage->datas[Page::getRealIndex(physicalIndex)].keyvalue;
                        if (Equal(node->key, inputKey))
                        {
                            lastAccess = node;
                            return node;
                        }
					}
					lastAccess = 0;
					return 0;
				}
				//已有的数据为Page,那么进入下次循环
				else if (Page::isPageIndex(physicalIndex))
				{
					currentPage = currentPage->datas[Page::getRealIndex(physicalIndex)].page;
					continue;
				}
				//已有的数据为非连续Page
				else if (Page::isJumpIndex(physicalIndex))
				{
					JumpNode* jPage = currentPage->datas[Page::getRealIndex(physicalIndex)].jumpNode;
					//可以加入下一个非连续Page,进入循环
					if (jPage->canbeJoin(hashCode))
					{
						currentPage = jPage->page;
						continue;
					}
					lastAccess = 0;
					return 0;
				}
				else throw getException("wHashMap Error.", __LINE__);
			}
			else
			{
				lastAccess = 0;
				return 0;
			}
		}
	}
	const KeyvalueNode* getKeyvalue(const WHMKey& inputKey) const { return ((wLinkedHashMap*)this)->getKeyvalue(inputKey); }

	KeyvalueNode* lastAccess;
	inline bool isLastAccess(const WHMKey& inputKey)
	{
		if (lastAccess && Equal(lastAccess->key, inputKey)) return true;
		return false;
	}

public:

	void clear() { delete TopPage; TopPage = new Page; }

	int size() const { return keyvalueCount; }

	Iterator firstNode() { return Iterator(headKeyvalue); }
	Iterator lastNode () { return Iterator(backKeyvalue); }
	Iterator getNode  (const WHMKey& inputKey) { return Iterator(getKeyvalue(inputKey)); }
	const Iterator firstNode() const { return Iterator(headKeyvalue); }
	const Iterator lastNode () const { return Iterator(backKeyvalue); }
	const Iterator getNode(const WHMKey& inputKey) const { return Iterator(((wLinkedHashMap*)this)->getKeyvalue(inputKey)); }

	bool contain(const WHMKey& inputKey) const { return 0 != getKeyvalue(inputKey); }

	WHMValue& get(const WHMKey& inputKey) { return getKeyvalue(inputKey)->value; }
	const WHMValue& get(const WHMKey& inputKey) const { return getKeyvalue(inputKey)->value; }

	WHMValue* Get(const WHMKey& inputKey) { KeyvalueNode* res = getKeyvalue(inputKey); return res ? &res->value : 0; }
	const WHMValue* Get(const WHMKey& inputKey) const { const KeyvalueNode* res = getKeyvalue(inputKey); return res ? &res->value : 0; }

	KeyvalueNode* pop(const WHMKey& inputKey)
	{
		lastAccess = 0;
		Page* lastPage = 0;
		unsigned __int8 lastLayerIndex;
		Page* currentPage = TopPage;
		WHMHash hashCode = getHashCode(inputKey);

		while (true)
		{
			unsigned __int8 layerIndex = hashInLayerIndex(hashCode, currentPage->layer);
			unsigned __int8 physicalIndex = currentPage->indexes[layerIndex];

			if (physicalIndex)
			{
				//已有的数据为Data
				if (Page::isDataIndex(physicalIndex))
				{
					if (Page::isListIndex(physicalIndex))
					{
						KeyvalueNodeVector* list = currentPage->datas[Page::getRealIndex(physicalIndex)].multiKeyvalue;
						if (getHashCode(list->datas[0]->key) == hashCode)
						{
							for (size_t i = 0; i < list->size; i++)
							{
								KeyvalueNode* node = list->datas[i];
								if (Equal(node->key, inputKey))
								{
									breakLink(node);
									if (list->size == 2)
                                    {
									    KeyvalueNode* remainNode = list->datas[i ? 0 : 1];
									    delete list;
									    currentPage->indexes[layerIndex] = Page::toDataIndex(physicalIndex);
									    currentPage->datas[Page::getRealIndex(physicalIndex)].keyvalue = remainNode;
                                    }
									else list->notSave(i);
									return node;
								}
							}
						}
					}
					else
					{
						KeyvalueNode* node = currentPage->datas[Page::getRealIndex(physicalIndex)].keyvalue;
						if (getHashCode(node->key) == hashCode && Equal(node->key, inputKey))
						{
                            breakLink(node);
                            currentPage->easyRemove(layerIndex, physicalIndex);
                            if (currentPage->count == 1 && lastPage) lastPage->translatNextPage(lastLayerIndex, currentPage);
                            else if (currentPage->count + Page::__maxSpareSize > currentPage->constrain) currentPage->shrink();
                            return node;
						}
					}
					return 0;
				}
				//已有的数据为Page,那么进入下次循环
				else if (Page::isPageIndex(physicalIndex))
				{
					lastPage = currentPage;
					lastLayerIndex = layerIndex;
					currentPage = currentPage->datas[Page::getRealIndex(physicalIndex)].page;
					continue;
				}
				//已有的数据为非连续Page
				else if (Page::isJumpIndex(physicalIndex))
				{
					JumpNode* jPage = currentPage->datas[Page::getRealIndex(physicalIndex)].jumpNode;
					//可以加入下一个非连续Page,进入循环
					if (jPage->canbeJoin(hashCode))
					{
						lastPage = currentPage;
						lastLayerIndex = layerIndex;
						currentPage = jPage->page;
						continue;
					}
					return 0;
				}
				else throw getException("wHashMap Error.", __LINE__);
			}
			else return 0;
		}
	}

	void remove(const WHMKey& inputKey) { KeyvalueNode* v = pop(inputKey); if (v) delete v; }

	void put(const WHMKey& inputKey, WHMValue&& inputValue, bool isReplaceIfHasData = true)
	{
		Page* currentPage = TopPage;
		WHMHash hashCode = getHashCode(inputKey);

		//wp("hashCode: 0x" + WConvert::toString(hashCode, 16) + ", 0b" + WConvert::toString(hashCode, 2));

		while (true)
		{
			unsigned __int8 layerIndex = hashInLayerIndex(hashCode, currentPage->layer);
			unsigned __int8 physicalIndex = currentPage->indexes[layerIndex];

			//wp("layer:" + WConvert::toString(currentPage->layer) + ", layerIndex:" + WConvert::toString(layerIndex) + ", physicalIndex:" + WConvert::toString(physicalIndex, 2));

			//需要插入的位置 有 数据  **EditOK**
			if (physicalIndex)
			{
				//已有的数据为Data  **EditOK**
				if (Page::isDataIndex(physicalIndex))
				{
					unsigned __int8 realPhysicalIndex = Page::getRealIndex(physicalIndex);
					typename Page::uData nodeOrMulti = currentPage->datas[Page::getRealIndex(physicalIndex)];
					WHMHash nodeHashCode = getHashCode(Page::isListIndex(physicalIndex) ? nodeOrMulti.multiKeyvalue->datas[0]->key : nodeOrMulti.keyvalue->key);
					//如果hashCode相同  **EditOK**
					if (nodeHashCode == hashCode)
					{
						if (Page::isListIndex(physicalIndex))
						{
							for (size_t i = 0; i < nodeOrMulti.multiKeyvalue->size; i++)
							{
								KeyvalueNode* node = nodeOrMulti.multiKeyvalue->datas[i];
								if (Equal(node->key, inputKey))
								{
									if (isReplaceIfHasData) node->value = std::move(inputValue);
									lastAccess = node;
									return;
								}
							}
							nodeOrMulti.multiKeyvalue->push(generateKeyvalue(inputKey, std::move(inputValue)));
							return;
						}
						else
						{
							if (Equal(nodeOrMulti.keyvalue->key, inputKey))
							{
								if (isReplaceIfHasData) nodeOrMulti.keyvalue->value = std::move(inputValue);
								lastAccess = nodeOrMulti.keyvalue;
								return;
							}
							KeyvalueNodeVector* multiKeyvalue = new KeyvalueNodeVector(nodeOrMulti.keyvalue, generateKeyvalue(inputKey, std::move(inputValue)));
							currentPage->indexes[layerIndex] = Page::toListIndex(physicalIndex);
							currentPage->datas[Page::getRealIndex(physicalIndex)].multiKeyvalue = multiKeyvalue;
							return;
						}
					}
					//如果keyhashcode不同,需要在其他页分道扬镳  **EditOK**
					else
					{
						Page* targetPage = new Page;
						targetPage->layer = combinateInLayer(nodeHashCode, hashCode);

						if (Page::isListIndex(physicalIndex))
							targetPage->easySet(hashInLayerIndex(nodeHashCode, targetPage->layer), 0, nodeOrMulti.multiKeyvalue);
						else
							targetPage->easySet(hashInLayerIndex(nodeHashCode, targetPage->layer), 0, nodeOrMulti.keyvalue);
						targetPage->easySet(hashInLayerIndex(hashCode, targetPage->layer), 1, generateKeyvalue(inputKey, std::move(inputValue)));
						targetPage->count = 2;
						targetPage->top = 2;

						if (targetPage->layer == currentPage->layer + 1)
						{
							currentPage->replaceInMultiData(targetPage, layerIndex);
						}
						else
						{
							JumpNode* targetJump = new JumpNode(targetPage, nodeHashCode);
							currentPage->replaceInMultiData(targetJump, layerIndex);
						}
						return;
					}
				}
				//已有的数据为Page,那么进入下次循环  **EditOK**
				else if (Page::isPageIndex(physicalIndex))
				{
					currentPage = currentPage->datas[Page::getRealIndex(physicalIndex)].page;
					continue;
				}
				//已有的数据为非连续Page  **EditOK**
				else if (Page::isJumpIndex(physicalIndex))
				{
					JumpNode* jPage = currentPage->datas[Page::getRealIndex(physicalIndex)].jumpNode;
					//可以加入下一个非连续Page,进入循环  **EditOK**
					if (jPage->canbeJoin(hashCode))
					{
						currentPage = jPage->page;
						continue;
					}
					//与此跳转页不同,需要在某个地方分道扬镳,或其他策略  **EditOK**
					else
					{
						Page* targetPage = new Page;
						//获得与跳转层能分道扬镳的层
						targetPage->layer = combinateInLayer(jPage->pageHashCode, hashCode);
						targetPage->easySet(hashInLayerIndex(hashCode, targetPage->layer), 0, generateKeyvalue(inputKey, std::move(inputValue)));
						//当插入的层与下一层刚好紧挨着,那么去掉跳转信息
						if (targetPage->layer + 1 == jPage->page->layer)
						{
							targetPage->easySet(hashInLayerIndex(jPage->pageHashCode, targetPage->layer), 1, jPage->page);

							jPage->page = 0;
							delete jPage;
						}
						//不相邻时,保留跳转层信息
						else
						{
							targetPage->easySet(hashInLayerIndex(jPage->pageHashCode, targetPage->layer), 1, jPage);
						}

						targetPage->count = 2;
						targetPage->top = 2;

						if (targetPage->layer == currentPage->layer + 1)
						{
							currentPage->replaceInMultiData(targetPage, layerIndex);
						}
						else
						{
							//wp("jumpKey:" + (int)inputKey.data);
							JumpNode* targetJump = new JumpNode(targetPage, hashCode);
							currentPage->replaceInMultiData(targetJump, layerIndex);
						}
						return;
					}
				}
				else throw getException("wHashMap Error.", __LINE__);
			}
			//需要插入的位置 没有 数据  **EditOK**
			else
			{
				currentPage->insertKeyvalue(generateKeyvalue(inputKey, std::move(inputValue)), layerIndex);
				return;
			}
		}
	}

	void put(const WHMKey& inputKey, const WHMValue& inputValue, bool isReplaceIfHasData = true) { put(inputKey, std::move(WHMValue(inputValue)), isReplaceIfHasData); }

	static void deleteKeyvalueNode(KeyvalueNode* v) { if (v) delete v; }

	static const char* getException(const char* formater, int line) { char* what = new char[32]; sprintf_s(what, 32, formater, line); return what; }
};

template<class T, class HashFunction = wHashMap_HashCode, class EqualFunction = wHashMap_Equal>
class wHashSet
{
public:
	class Iterator
	{
	public:
		Iterator(typename wLinkedHashMap<T, size_t, HashFunction, EqualFunction>::Iterator _node) : node(_node) { }
		bool hasNext() { return node.hasNext(); }
		bool hasPrev() { return node.hasPrev(); }
		Iterator& next() { node.next(); return *this; }
		Iterator& prev() { node.prev(); return *this; }
		const T& value() { return node.key(); }
		size_t inputTimes() { return node.value(); }
		bool isNull() { return node.isNull(); }
		bool notNull() { return node.notNull(); }
	private:
		typename wLinkedHashMap<T, size_t, HashFunction, EqualFunction>::Iterator node;
	};
	Iterator iterator() { return Iterator(datas.firstNode()); }
	const Iterator iterator() const { return Iterator(datas.firstNode()); }
	void put(const T& v) { if (datas.contain(v)) datas.get(v)++; else datas.put(v, 1); }
	void remove(const T& v) { datas.remove(v); }
	size_t inputTimes(const T& v) { size_t* times = datas.Get(v); return times ? *times : 0; }
	int size() const { return datas.size(); }
	void clear() { datas.clear(); }

private:
	wLinkedHashMap<T, size_t, HashFunction, EqualFunction> datas;
};


#endif // !__wHashMap__

性能测试

用以上代码进行性能测试,结果如图

56494a3601d141649655bf59aa490286.png

 测试代码

#include <time.h>
void WHashMapTest()
{
    char bufA[128];
    clock_t lastTime;
    auto GetWorkingMemory = [&]() {
        HANDLE handle = GetCurrentProcess();
        PROCESS_MEMORY_COUNTERS pmc;
        GetProcessMemoryInfo(handle, &pmc, sizeof(pmc));
        sprintf(bufA, "%dM", (int)(pmc.WorkingSetSize / 1000 / 1000));
        return bufA;
    };
    auto Clock = [&]() { clock_t current = clock(); clock_t res = current - lastTime; lastTime = current; return res; };


    wLinkedHashMap<size_t, size_t> map;
    static const size_t length = 3000000, space = 300, offset = 100000000;
    ::printf("\n=============================【  测试数据  】=============================\n");
    ::printf("插入数据量 = %d\n哈希码间距 = %d\n起始哈希码 = %d\n最大哈希码 = %d\n", length, space, offset, length * space + offset);


    ::printf("\n=============================【 第一次插入 】=============================\n");
    ::printf("插入数据前内存使用%s\n", GetWorkingMemory());
    size_t front, back;
    front = offset; back = length * space + offset;
    Clock();    //计时
    for (size_t i = length / 2; i > 0; i--)
    {
        map.put(front, front);
        map.put(back , back);
        front += space;
        back  -= space;
    }
    ::printf("插入%d次耗时%dms,内存使用%s\n", length, Clock(), GetWorkingMemory());


    ::printf("\n=============================【 第一次删除 】=============================\n");
    front = offset; back = length * space + offset;
    for (size_t i = length / 2; i > 0; i--)
    {
        map.remove(front);
        map.remove(back );
        front += space;
        back  -= space;
    }
    ::printf("删除%d次耗时%dms,内存使用%s\n", length, Clock(), GetWorkingMemory());


    ::printf("\n=============================【 第二次插入 】=============================\n");
    front = offset; back = length * space + offset;
    for (size_t i = length / 2; i > 0; i--)
    {
        map.put(front, front);
        map.put(back , back);
        front += space;
        back  -= space;
    }
    ::printf("插入%d次耗时%dms,内存使用%s\n", length, Clock(), GetWorkingMemory());


    ::printf("\n=============================【  查找测试  】=============================\n");
    size_t findCount = 0, notFindCount = 0;
    front = offset; back = length * space + offset;
    for (size_t i = length / 2; i > 0; i--)
    {
        if (map.contain(front)) findCount++; else notFindCount++;
        if (map.contain(back )) findCount++; else notFindCount++;
        front += space;
        back -= space;
    }
    ::printf("查找已存储的数据%d次耗时%dms,已找到%d\n", length, Clock(), findCount);
    front = offset; back = length * space + offset;
    for (size_t i = length / 2; i > 0; i--)
    {
        if (map.contain(front + 1)) findCount++; else notFindCount++;
        if (map.contain(back  + 1)) findCount++; else notFindCount++;
        front += space;
        back  -= space;
    }
    ::printf("查找未存储的数据%d次耗时%dms,未找到%d\n", length, Clock(), notFindCount);


    ::printf("\n=============================【 第二次删除 】=============================\n");
    front = offset; back = length * space + offset;
    for (size_t i = length / 2; i > 0; i--)
    {
        map.remove(front);
        map.remove(back);
        front += space;
        back -= space;
    }
    ::printf("删除%d次耗时%dms,内存使用%s\n", length, Clock(), GetWorkingMemory());

    ::printf("\n\n");
    system("pause");
}

 

 

 


网站公告

今日签到

点亮在社区的每一天
去签到