【C++】—— 封装底层为哈希表的unordered_map/set

190 阅读 0 评论 126 点赞

我是靠谱客的博主魔幻寒风，这篇文章主要介绍【C++】—— 封装底层为哈希表的unordered_map/set，现在分享给大家，希望可以做个参考。

之前介绍了哈希表和解决哈希冲突的两种方式

闭散列解决哈希冲突
开散列解决哈希冲突

今天介绍如何封装底层为哈希表的unordered_map/set，这里是以拉链法实现的哈希表作为底层结构构(哈希桶)

哈希表的改造

一、改造HashNode

因为哈希表不仅要给unordered_map 使用还要给unordered_set使用，因此节点中存储的元素可能为键值对pair<K,V>，也可能只是一个值value，因此我们这里存储的值给了一个泛型，若是unordered_map调用就存储键值对pair<K,V>，若是unordered_set就存储值value。

template<class V>
struct HashNode
{
	//由于之后要用哈希桶封装unordered_map/set，因此这里可能存pair<K,V>也可能存value
	V _valuefiled;
	HashNode<V>* _next;//哈希桶结构是一个vector数组下挂着链表，定义一个指针指向下一个节点的位置

	HashNode(const V& v)
		:_valuefiled(v)
		, _next(nullptr)
	{}
};

二、改造哈希表的Insert

第一个就是插入的值不再仅仅只是键值对，而是看是UnorderedMap使用红黑树时，就传pair<K,V>,UnorderedSet使用时就传value
第二个比较关键的改造是在于，比较所存储值的大小，我们知道map中存储的是键值对，比较大小使用键值对中的第一个值key进行比较，而set比较直接就是用该值进行的，因此这里我们就借助了一个仿函数来实现，创建一个仿函数对象，通过该对象的返回值来进行大小的比较

	bool Insert(const V& v)
	{
		//考虑增容
		CheckCapacity();

		KeyOfValue kov;
		const K& key = kov(v);
		//size_t index = key % _table.size();
		size_t index = HashIndex(key, _table.size());

		Node* cur = _table[index];
		while (cur)
		{
			if (kov(cur->_valuefiled) == key)
				return false;
			cur = cur->_next;
		}
		//走到这说明没有相同的元素，可以进行插入，
		//由于哈希桶不规定产生冲突的序列有序,可以进行头插比较简单
		Node* newnode = new Node(v);
		newnode->_next = _table[index];
		_table[index] = newnode;

		++_size;
		return true;
	}

insert

三、构建哈希表的迭代器

在实现迭代器之前，我们先得了解一个哈希表，之前也介绍了拉链法实现的哈希表其实是一个数组存储了一系列的指针，指针又链接了每个桶里产生哈希冲突的节点。
迭代器主要是为了遍历哈希表的元素，因此最主要是要实现迭代器的operator++，主要分为两种情况，第一种情况：要是在一个桶里面，就是产生哈希冲突的节点之间，operator++只需要返回节点的_next即可， 第二种情况：需要处理的是当已经走到一个桶的最后要寻找下一个桶的位置的情况
由于我们需要处理第二种情况，所有我们需要有一个指向哈希表的指针，所以迭代器中有两个成员一个是指向哈希节点的指针，一个是指向哈希表的指针。

//前置声明，由于哈希表需要迭代器，迭代器又需要哈希表
template<class K, class V, class KeyOfValue,class HashFunc>
class HashTable;

//实现哈希表的迭代器
template<class K, class V, class KeyOfValue, class HashFunc>
struct HTIterator
{
	typedef HashNode<V> Node;
	typedef HTIterator<K, V, KeyOfValue, HashFunc> Self;

	Node* _node;//节点指针
	HashTable<K, V, KeyOfValue,HashFunc>* _ht;//哈希表指针

	HTIterator(Node* node, HashTable<K, V, KeyOfValue,HashFunc>* ht)
		:_node(node)
		, _ht(ht)
	{}

	V& operator*()//返回节点的值
	{
		return _node->_valuefiled;
	}
	
	V* operator->()//返回节点指针
	{
		return &_node->_valuefiled;
	}

	Self& operator++()
	{
		if (_node->_next)
		{
			_node = _node->_next;
		}
		else
		{
			// 找下一个桶的第一个节点位置
			//size_t i = KeyOfValue()(_node->_valuefiled) % _ht->_table.size();
			size_t i = _ht->HashIndex(KeyOfValue()(_node->_valuefiled), _ht->_table.size());
			++i;
			for (; i < _ht->_table.size(); ++i)
			{
				if (_ht->_table[i] != nullptr)
				{
					_node = _ht->_table[i];
					break;
				}
			}

			if (i == _ht->_table.size())
			{
				_node = nullptr;
			}
		}

		return *this;
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}
};

构造UnorderedMap

#include "HashTable.h"
#include "common.h"

template<class K, class V,class HashFunc = HashFunc<K>>
class UnorderedMap
{
	struct MapKeyOfValue
	{
		const K& operator()(const pair<K, V>& _kv)
		{
			return _kv.first;
		}
	};
public:
	typedef typename HashTable<K, pair<K, V>, MapKeyOfValue, HashFunc>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}


	pair<iterator,bool> Insert(const pair<K, V>& kv)
	{
		return _ht.Insert(kv);
	}

	V& operator[](const K& key)
	{
		std::pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
		return ret.first->second;
	}

private:
	HashTable<K, pair<K, V>, MapKeyOfValue,HashFunc> _ht;
};

构造UnorderedSet

#include "HashTable.h"
#include "common.h"

template<class K,class HashFunc = HashFunc<K>>
class UnorderedSet
{
	struct SetKeyOfValue
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	typedef typename HashTable<K, K, SetKeyOfValue, HashFunc>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}

	pair<iterator,bool> Insert(const K& key)
	{
		return _ht.Insert(key);
	}
private:
	HashTable<K, K, SetKeyOfValue,HashFunc> _ht;
};

HashFunc模板参数

上述代码中不仅出现了一次的HashFunc肯定有所困惑，下面就解释一下HashFunc的作用
我们之前一只没有对这个模板参数进行详细的讲解，这里我们单独将他拿出来进行探讨，首先读者们应该明白要将数据映射到哈希表中，K值必须是一个可以取模的值，那么现在问题来了，我们之前在使用unordered_map/set时我们发现string也可以进行映射，但是我们能对string直接进行取模呢？答案当然是不能，所以这里我们就要借助HashFunc这个仿函数对string类型进行取模。

字符串哈希算法

那么说到这里，这个字符串哈希算法应该怎么实现呢？

情景1：将字符串的第一个字符作为K映射缺点：那么以某个字符开头的字符串全部都被映射在同一个位置
情景2：将字符串中的所有字符asll值加起来作为K映射缺点：“abcd”,“acdb”,“adbc”…都被映射在同一个位置

由此可见上述两种情景都不是最好的解决办法，因此参考字符串哈希函数字符串哈希函数
我们可以通过下面的代码解决string的哈希映射问题

template<>
struct HashFunc<string>
{
	size_t operator()(const string& s)
	{
		size_t hash = 0;
		for (size_t i = 0; i < s.size(); ++i)
		{
			hash = hash * 131 + s[i];
		}
		return hash;
	}
};

看到这里可能有人会疑惑，既然有了HashFunc这个模板参数了，那我们平时使用unordered_map/set的时候为什么没有传这个参数就可以直接使用了呢？这里就体现了缺省参数和模板特化的强大了

哈希源码

HashTable.h

#pragma once

//开散列解决哈希冲突，哈希桶、拉链法
#include <iostream>
#include <vector>
using namespace std;

template<class V>
struct HashNode
{
	//由于之后要用哈希桶封装unordered_map/set，因此这里可能存pair<K,V>也可能存value
	V _valuefiled;
	HashNode<V>* _next;//哈希桶结构是一个vector数组下挂着链表，定义一个指针指向下一个节点的位置

	HashNode(const V& v)
		:_valuefiled(v)
		, _next(nullptr)
	{}
};

//前置声明，由于哈希表需要迭代器，迭代器又需要哈希表
template<class K, class V, class KeyOfValue,class HashFunc>
class HashTable;

//实现哈希表的迭代器
template<class K, class V, class KeyOfValue, class HashFunc>
struct HTIterator
{
	typedef HashNode<V> Node;
	typedef HTIterator<K, V, KeyOfValue, HashFunc> Self;

	Node* _node;//节点指针
	HashTable<K, V, KeyOfValue,HashFunc>* _ht;//哈希表指针

	HTIterator(Node* node, HashTable<K, V, KeyOfValue,HashFunc>* ht)
		:_node(node)
		, _ht(ht)
	{}

	V& operator*()//返回节点的值
	{
		return _node->_valuefiled;
	}
	
	V* operator->()//返回节点指针
	{
		return &_node->_valuefiled;
	}

	Self& operator++()
	{
		if (_node->_next)
		{
			_node = _node->_next;
		}
		else
		{
			// 找下一个桶的第一个节点位置
			//size_t i = KeyOfValue()(_node->_valuefiled) % _ht->_table.size();
			size_t i = _ht->HashIndex(KeyOfValue()(_node->_valuefiled), _ht->_table.size());
			++i;
			for (; i < _ht->_table.size(); ++i)
			{
				if (_ht->_table[i] != nullptr)
				{
					_node = _ht->_table[i];
					break;
				}
			}

			if (i == _ht->_table.size())
			{
				_node = nullptr;
			}
		}

		return *this;
	}

	bool operator!=(const Self& s)
	{
		return _node != s._node;
	}
};

//由于之后要用哈希桶封装unordered_map/set，因此这里可能存pair<K,V>也可能存value
//由于pair<K,V>存储比较必须取key才能比较，因此传仿函数取key
//传HashFunc是为了将一个不能直接取模的key类型，eg：string
template<class K, class V, class KeyOfValue, class HashFunc>
class HashTable
{
	typedef HashNode<V> Node;
public:
	template<class K, class V, class KeyOfValue, class HashFunc>
	friend struct HTIterator;
	typedef HTIterator<K, V, KeyOfValue,HashFunc> iterator;

	HashTable()
		:_size(0)
	{}

	iterator begin()
	{
		for (size_t i = 0; i < _table.size(); ++i)
		{
			if (_table[i] != nullptr)
			{
				return iterator(_table[i], this);
			}
		}

		return iterator(nullptr, this);
	}

	iterator end()
	{
		return iterator(nullptr, this);
	}

	pair<iterator,bool> Insert(const V& v)
	{
		//考虑增容
		CheckCapacity();

		KeyOfValue kov;
		const K& key = kov(v);
		//size_t index = key % _table.size();
		size_t index = HashIndex(key, _table.size());

		Node* cur = _table[index];
		while (cur)
		{
			if (kov(cur->_valuefiled) == key)
				return make_pair(iterator(cur, this), false);
			cur = cur->_next;
		}
		//走到这说明没有相同的元素，可以进行插入，
		//由于哈希桶不规定产生冲突的序列有序,可以进行头插比较简单
		Node* newnode = new Node(v);
		newnode->_next = _table[index];
		_table[index] = newnode;

		++_size;
		return make_pair(iterator(newnode,this),true);
	}

	void CheckCapacity()
	{
		//当负载因子==1时扩容
		if (_table.size() == _size)
		{
			size_t newsize = _table.size() == 0 ? 10 : _table.size() * 2;
			//这里不像之前开散列一样创建一个新的哈希表
			//再调用Insert的原因是旧表的节点可以直接拿到新的vector数组中进行插入
			vector<Node*> newtable;
			newtable.resize(newsize);

			//遍历旧表，在新的vector数组中找到对应位置，将旧表节点插入
			for (size_t i = 0; i < _table.size(); ++i)
			{
				Node* cur = _table[i];
				//将节点从旧表中拆出来，再重新计算节点在新表中的位置进行插入
				while (cur)
				{
					Node* next = cur->_next;
					//size_t index = KeyOfValue()(cur->_valuefiled) % newsize;
					size_t index = HashIndex(KeyOfValue()(cur->_valuefiled), newsize);
					//头插入新表中
					cur->_next = newtable[index];
					newtable[index] = cur;

					cur = next;
				}
				//将原来的表置空
				_table[i] = nullptr;
			}
			//交换新旧两标的资源，出作用域后新表自动调用析构函数释放旧表资源
			_table.swap(newtable);
		}
	}

	Node* Find(const K& key)
	{
		//size_t index = key % _table.size();
		size_t index = HashIndex(key, _table.size());
		Node* cur = _table[index];
		while (cur)
		{
			if (KeyOfValue()(cur->_valuefiled) == key)
				return cur;

			cur = cur->_next;
		}
		return nullptr;
	}

	bool Erase(const K& key)
	{
		//size_t index = key % _table.size();
		size_t index = HashIndex(key, _table.size());
		Node* cur = _table[index];
		Node* prev = nullptr;

		while (cur)
		{
			if (KeyOfValue()(cur->_valuefiled) == key)
			{
				if (prev == nullptr)//头删
					_table[index] = cur->_next;
				else
					prev->_next = cur->_next;

				delete cur;
				--_size;
				return true;
			}
		}
		return false;
	}

	size_t HashIndex(const K& key, size_t size)
	{
		HashFunc hf;
		return hf(key) % size;
	}
private:
	vector<Node*> _table;//vector中的size为哈希表的大小
	size_t _size;//哈希表中存储的有效元素的个数
};

UnorderedMap.h

#include "HashTable.h"
#include "common.h"

template<class K, class V,class HashFunc = HashFunc<K>>
class UnorderedMap
{
	struct MapKeyOfValue
	{
		const K& operator()(const pair<K, V>& _kv)
		{
			return _kv.first;
		}
	};
public:
	typedef typename HashTable<K, pair<K, V>, MapKeyOfValue, HashFunc>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}


	pair<iterator,bool> Insert(const pair<K, V>& kv)
	{
		return _ht.Insert(kv);
	}

	V& operator[](const K& key)
	{
		std::pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
		return ret.first->second;
	}

private:
	HashTable<K, pair<K, V>, MapKeyOfValue,HashFunc> _ht;
};

void TestUnorderedMap()
{
	UnorderedMap<int, int> m;
	m.Insert(std::make_pair(15, 15));
	m.Insert(std::make_pair(5, 5));
	m.Insert(std::make_pair(25, 25));
	m[7] = 6;

	for (size_t i = 0; i < 11; ++i)
	{
		m.Insert(make_pair(i, i));
	}

	UnorderedMap<int, int>::iterator it = m.begin();
	while (it != m.end())
	{
		cout << it->first << ":" << it->second << endl;
		++it;
	}
}

void TestUnorderedMap2()
{
	UnorderedMap<string, string> sm;
	sm.Insert(make_pair(string("sort"), string("排序")));
	sm.Insert(make_pair(string("left"), string("左边")));
	sm.Insert(make_pair(string("string"), string("字符串")));
	sm.Insert(make_pair(string("insert"), string("插入")));

	for (auto& kv : sm)
	{
		cout << kv.first << ":" << kv.second << endl;
	}
}

UnorderedSet

#include "HashTable.h"
#include "common.h"

template<class K,class HashFunc = HashFunc<K>>
class UnorderedSet
{
	struct SetKeyOfValue
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	typedef typename HashTable<K, K, SetKeyOfValue, HashFunc>::iterator iterator;

	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}

	pair<iterator,bool> Insert(const K& key)
	{
		return _ht.Insert(key);
	}
private:
	HashTable<K, K, SetKeyOfValue,HashFunc> _ht;
};

void TestUnorderedSet()
{
	UnorderedSet<int> s;
	s.Insert(1);
	s.Insert(5);
	s.Insert(3);
	s.Insert(10);
	s.Insert(5);
	s.Insert(19);
	s.Insert(3);
	s.Insert(2);
	s.Insert(8);

	for (auto& e : s)
	{
		cout << e << " ";
	}
	cout << endl;
}

common.h//HashFunc

#pragma once
#include <iostream>
#include <string>

using namespace std;
template<class K>
struct HashFunc
{
	const K& operator()(const K& key)
	{
		return key;
	}
};

template<>
struct HashFunc<string>
{
	size_t operator()(const string& s)
	{
		size_t hash = 0;
		for (size_t i = 0; i < s.size(); ++i)
		{
			hash = hash * 131 + s[i];
		}
		return hash;
	}
};