[c++]——用哈希表封装unordered_map/set用哈希表封装unordered_map/set总结

74 阅读 0 评论 49 点赞

我是靠谱客的博主包容寒风，这篇文章主要介绍[c++]——用哈希表封装unordered_map/set用哈希表封装unordered_map/set总结，现在分享给大家，希望可以做个参考。

用哈希表封装unordered_map/set

上一篇博客我们讲了哈希表(戳这里查看上一篇博客)，这篇博客我们来用已经实现的哈希表来封装出unordered_map/set，我们以前讲过用红黑树封装map/set，其实这里也是换汤不换药，原理基本一模一样，只不过是哈希表有4个模板参数，通过这篇博客，你对哈希表的4个模板参数一定会有新的理解。

unordered_map

同样我们用unordered_map来举例，会了unordered_map你也肯定会unordered_set了，这里我们先来看看Umap(简写一下)的基本框架：

template<class K, class V, class HashFunc = HashFunc<K>>
class UnorderedMap
{
	typedef pair<K, V> ValueType;
	struct MapKeyOfValue
	{
		const K& operator()(const ValueType& v)
		{
			return v.first;
		}
	};
public:
			//...
private:
	HashTable<K, ValueType, MapKeyOfValue, HashFunc> ht;
};

我们把哈希表的4个模板参数也拿出来看看：

template<class K, class V, class KeyOfValue, class HashFunc>
class HashTable

class K：很容易理解，K就是传给哈希表用来映射取模的key
class V：对于Umap来说V是要映射到哈希表的值，但是对于HashTable来说ValueType可能是pair类型也可能是K类型
class KeyOfValue：这里是取K值的仿函数，如果是pair<K,V>类型就需要返回K(MapKeyOfValue)
class HashFunc：这个模板参数非常重要，他也是一个仿函数，他的作用是如果遇到无法取模的类型(string…)帮助我们转化成数字,我们后面单独介绍

所以调用者与UnorderedMap和哈希表的关系如下

在这里插入图片描述
如果还有小伙伴还是没有明白关系介意戳这里用红黑树封装map/set，笔者的这篇博客最后有一张非常大的图就是梳理这里封装的关系的，用红黑树封装map/set和这里原理完全相同。

补全UnorderedMap的其他功能

这里Umap所有的接口最后都是调用哈希表的所以我们直接补全就行：

#pragma once
#include"HashTable.h"
#include"common.h"

template<class K, class V, class HashFunc = HashFunc<K>>
class UnorderedMap
{
	typedef pair<K, V> ValueType;
	struct MapKeyOfValue
	{
		const K& operator()(const ValueType& v)
		{
			return v.first;
		}
	};
public:
	typedef typename HashTable<K, ValueType, MapKeyOfValue, HashFunc>::iterator iterator;

	pair<iterator, bool> Insert(const ValueType& v)
	{
		return ht.Insert(v);
	}

	iterator begin()
	{
		return ht.begin();
	}

	iterator end()
	{
		return ht.end();
	}

	V& operator[](const K& key)//与map的[]实现原理完全相同
	{
		pair<iterator, bool> ret = ht.Insert(make_pair(key, V()));
		return ret.first->second;
	}

private:
	HashTable<K, ValueType, MapKeyOfValue, HashFunc> ht;
};

HashFunc模板参数

我们之前一只没有对这个模板参数进行详细的讲解，这里我们单独将他拿出来进行探讨，首先读者们应该明白要将数据映射到哈希表中，K值必须是一个可以取模的值，那么现在问题来了，我们之前在使用unordered_map/set时我们发现string也可以进行映射，但是我们能对string直接进行取模呢？答案当然是不能，所以这里我们就要借助HashFunc这个仿函数对string类型进行取模。

字符串哈希算法
那么说到这里，这个字符串哈希算法应该怎么实现呢？

情景1：将字符串的第一个字符作为K映射缺点：那么以某个字符开头的字符串全部都被映射在同一个位置
情景2：将字符串中的所有字符asll值加起来作为K映射缺点：“abcd”,“acdb”,“adbc”…都被映射在同一个位置

所以上面的情景并不是最好的映射方式，经过网络上查找以及搜索找到了下面效率最高的算法，参考字符串哈希函数：

		size_t hash = 0;
		for (size_t i = 0; i < s.size(); i++)
		{
			hash = hash * 131 + s[i];//给每个字符的asll值乘以131，这个131叫做累乘因子
		}
		return hash;

所以第4个模板参数的仿函数实现如下：

string的哈希函数相当于时普通哈希函数的一个特化（这时就体现出了语法的强大，不然使用string作为K每次都需要传仿函数）

#pragma once
template<class K>
struct HashFunc
{
	const K& operator()(const K& key)
	{
		return key;
	}
};

template<>
struct HashFunc<std::string>
{
	size_t operator()(const std::string& s)
	{
		size_t hash = 0;
		for (size_t i = 0; i < s.size(); i++)
		{
			hash = hash * 131 + s[i];
		}
		return hash;
	}
};

unordered_set

unordered_set的实现原理也完全相同，直接上代码：

我们发现set也需要相同的HashFunc函数，所以我们将上面的哈希函数直接放到common.h即可

#pragma once
#include"HashTable.h"
#include"common.h"

template<class K, class HashFunc = HashFunc<K>>
class UnorderedSet
{
public:
	typedef K ValueType;
	struct SetKeyOfValue
	{
		const K& operator()(const ValueType& v)
		{
			return v;
		}
	};
	typedef typename HashTable<K, ValueType, SetKeyOfValue, HashFunc>::iterator iterator;

	pair<iterator, bool> Insert(const ValueType& v)
	{
		return ht.Insert(v);
	}

	iterator begin()
	{
		return ht.begin();
	}

	iterator end()
	{
		return ht.end();
	}
private:
	HashTable<K, ValueType, SetKeyOfValue, HashFunc> ht;
};

总结

要封装出自己的unordered_map/set首先要明了哈希表底层的原理，要清楚他们直接是如何只使用一个哈希表即封装出K的模型又封装出了K-V的模型，而且哈希表第四个模板参数也是一个非常重要的知识点。这就是我们本篇博客的内容，有什么问题大家尽管指出。

附录（哈希表源码）

HashTable.h

#pragma once
#include<iostream>
#include<vector>
#include<string>
using namespace std;

template<class V>
struct HashNode
{
	V _Value;
	HashNode<V>* _next;
	HashNode(const V& v)
		:_Value(v)
		, _next(nullptr)
	{}
};

template<class K, class V, class KeyOfValue,class HashFunc>//前置声名
class HashTable;

template<class K, class V, class KeyOfValue, class HashFunc>
struct HTIterator
{
	typedef HashNode<V> Node;
	typedef HTIterator<K, V, KeyOfValue, HashFunc> self;
	Node* _node;
	HashTable<K, V, KeyOfValue, HashFunc>* _ht;//需要哈希表指针来寻找迭代器下一个要到达的位置

	HTIterator(Node* node, HashTable<K, V, KeyOfValue, HashFunc>* ht)
		: _node(node)
		, _ht(ht)
	{}

	self operator++()
	{
		if (_node->_next)//直接指向下一给
		{
			_node = _node->_next;
		}
		else
		{
			//size_t index = KeyOfValue()(_node->_Value) % (_ht->_table.size());//找到当前所在表的位置
			size_t index = _ht->GetIndex(KeyOfValue()(_node->_Value), _ht->_table.size());
			++index;

			for (; index < _ht->_table.size(); index++)
			{
				if (_ht->_table[index] != nullptr)
				{
					_node = _ht->_table[index];//寻找表的下一个位置
					break;
				}
			}
			if (index == _ht->_table.size())//如果已经走到结尾，那么直接将迭代器置空
			{
				_node = nullptr;
			}
		}

		return *this;
	}

	V& operator*()
	{
		return _node->_Value;
	}

	V* operator->()
	{
		return &_node->_Value;
	}

	bool operator!=(const self& s)
	{
		return _node != s._node;
	}
};

template<class K, class V, class KeyOfValue, class HashFunc>
class HashTable
{
	typedef HashNode<V> Node;
	template<class K, class V, class KeyOfValue, class HashFunc>//模板类型的迭代器
	friend struct HTIterator;
public:
	typedef HTIterator<K, V, KeyOfValue, HashFunc> iterator;
	HashTable()
		:_size(0)
	{}

	iterator begin()
	{
		for (int i = 0; i < _table.size(); i++)
		{
			if (_table[i] != nullptr)
			{
				return iterator(_table[i], this);//this实际上就是哈希表的指针
			}
		}
		return iterator(nullptr, this);//走到这里说明此时这个哈希表为空
	}

	iterator end()
	{
		return iterator(nullptr, this);
	}

	void CheckCapacity()
	{
		if (_size == _table.size())
		{
			size_t newsize = _table.size() == 0 ? 10 : _table.size() * 2;
			vector<Node*> _newht;
			_newht.resize(newsize);
			for (int i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;
					//size_t index = KeyOfValue()(cur->_Value) % _newht.size();
					size_t index = GetIndex(KeyOfValue()(cur->_Value) , _newht.size());
					cur->_next = _newht[index];
					_newht[index] = cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
			_table.swap(_newht);
		}
	}	

	pair<iterator,bool> Insert(const V& v)
	{
		CheckCapacity();
		KeyOfValue kov;
		//size_t index = kov(v)%_table.size();
		size_t index = GetIndex(KeyOfValue()(v), _table.size());
		Node* cur = _table[index];
		while (cur)
		{
			if (kov(cur->_Value) == kov(v))//表示表中已经存在了此数据
				return make_pair(iterator(cur,this),false);
			cur = cur->_next;
		}

		Node* newnode = new Node(v);//此处进行头插，相当于对数组的赋值
		newnode->_next = _table[index];
		_table[index] = newnode;
		++_size;

		return make_pair(iterator(newnode, this), true);//注意这里构造的迭代器需要传this
	}

	size_t GetIndex(const K& key, size_t size)
	{
		HashFunc hf;
		return hf(key) % size;
	}
private:
	vector<Node*> _table;
	size_t _size;
};

common.h

#pragma once
template<class K>
struct HashFunc
{
	const K& operator()(const K& key)
	{
		return key;
	}
};

template<>
struct HashFunc<std::string>
{
	size_t operator()(const std::string& s)
	{
		size_t hash = 0;
		for (size_t i = 0; i < s.size(); i++)
		{
			hash = hash * 131 + s[i];
		}
		return hash;
	}
};