当前位置：首页 > news >正文

C++之unordered_map，unordered_set模拟实现

news 2025/7/14 23:02:59

unordered_map，unordered_set模拟实现

哈希表源代码
哈希表模板参数的控制
仿函数增加
正向迭代器实现
- *运算符重载
- ->运算符重载
- ++运算符重载
- != 和 == 运算符重载
- begin()与end()实现
unordered_set实现
unordered_map实现
map/set 与 unordered_map/unordered_set对比
哈希表调整后代码

哈希表源代码

template<class K>
struct HashFunc
{size_t operator()(const K& key){//所有类型都强转为size_t类型return (size_t)key;}
};//模板特化
template<>
struct HashFunc<string>
{size_t operator()(const string& key){size_t val = 0;for (auto ch : key){val *= 131;val += ch;}return val;}
};
namespace HashBucket
{template<class K, class V>struct HashNode{pair<K, V> _kv;HashNode<K, V>* _next;//构造函数HashNode(const pair<K, V>& kv):_kv(kv),_next(nullptr){}};template<class K, class V, class Hash = HashFunc<K>>class HashTable{typedef HashNode<K, V> Node;public:~HashTable(){for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_tables[i] = nullptr;}}inline size_t __stl_next_prime(size_t n){static const size_t __stl_num_primes = 28;static const size_t __stl_prime_list[__stl_num_primes] ={53, 97, 193, 389, 769,1543, 3079, 6151, 12289, 24593,49157, 98317, 196613, 393241, 786433,1572869, 3145739, 6291469, 12582917, 25165843,50331653, 100663319, 201326611, 402653189, 805306457,1610612741, 3221225473, 4294967291};for (size_t i = 0; i < __stl_num_primes; ++i){if (__stl_prime_list[i] > n){return __stl_prime_list[i];}}return -1;}bool Insert(const pair<K, V>& kv){//如果该键值对存在，就返回falseif (Find(kv.first)){return false;}Hash hash;//如果负载因子为1就扩容if (_size == _tables.size()){//创建一个新的哈希表vector<Node*> newTables;size_t newSizes = _size == 0 ? 10 : 2 * _tables.size();//将每个元素初始化为空newTables.resize(__stl_next_prime(_tables.size()), nullptr);//将旧表结点插入到新表当中for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){//记录cur的下一个结点Node* next = cur->_next;//计算相应的哈希桶编号size_t hashi = hash(cur->_kv.first) % newTables.size();//将旧表结点移动值新表cur->_next = newTables[hashi];newTables[hashi] = cur;cur = next;}_tables[i] = nullptr;}_tables.swap(newTables);}//计算哈希桶编号size_t hashi = hash(kv.first) % _tables.size();//插入结点Node* newnode = new Node(kv);newnode->_next = _tables[hashi];_tables[hashi] = newnode;//元素个数++_size++;return true;}//查找Node* Find(const K& key){//哈希表为空就返回空if (_tables.size() == 0){return nullptr;}Hash hash;//计算哈希地址size_t hashi = hash(key) % _tables.size();Node* cur = _tables[hashi];//遍历哈希桶while (cur){if ((cur->_kv.first) == key){return cur;}cur = cur->_next;}return nullptr;}//删除bool Erase(const K& key){//哈希表大小为0，删除失败if (_tables.size() == 0){return false;}Hash hash;//计算哈希地址size_t hashi = hash(key) % _tables.size();Node* prev = nullptr;Node* cur = _tables[hashi];//遍历哈希桶，寻找删除结点是否存在while (cur){if (hash(hash(cur->_kv.first)) == key){if (prev){prev->_next = cur->_next;}else{_tables[hashi] = cur->_next;}//删除该结点delete cur;_size--;return true;}prev = cur;cur = cur->_next;}//删除结点不存在，返回falsereturn false;}size_t Size(){return _size;}size_t TableSize(){return _tables.size();}size_t BucketNum(){size_t num = 0;for (size_t i = 0; i < _tables.size(); i++){if (_tables[i]){num++;}}return num;}private:vector<Node*> _tables;size_t _size = 0;};
}

哈希表模板参数的控制

unordered_set属于K模型，unordered_map属于KV模型，但是在底层上我们都是用一个哈希表来实现的，所以我们需要将哈希表的第二个参数设置为T。

template<class K, class T>struct HashNode{T _data;HashNode<T>* _next;//构造函数HashNode(const T& data):_data(data),_next(nullptr){}};template<class K, class T, class Hash = HashFunc<K>>class HashTable{typedef HashNode<T> Node;public://......private:vector<Node*> _tables;size_t _size = 0;
};

T模板参数可能只是键值Key，也可能是由Key和Value共同构成的键值对。如果是unordered_set容器，那么它传入底层红黑树的模板参数就是Key和Key：

template<class K, class Hash = HashFunc<K>>
class unorder_set
{
public://...
private:HashBucket::HashTable<K, K, Hash> _ht;
};

如果是unordered_map容器，那么它传入底层红黑树的模板参数就是Key和Value：

template<class K, class V, class Hash = HashFunc<K>>
class unorder_map
{
public://...
private:HashBucket::HashTable<K, pair<K, V>, Hash> _ht;
};

在这里插入图片描述

仿函数增加

对于unordered_set容器，我们需要进行键值比较就是对key值进行比较，也就是直接比较T就可以了，但是对于unordered_map容器来说，我们需要比较的是键值对<key，value>中的key，我们需要先将key提取出来，在进行比较。

所以，我们需要在上层unordered_set和unordered_map中各提供一个仿函数，根据传入的T类型分开进行比较操作：

map仿函数：

template<class K, class V, class Hash = HashFunc<K>>
class unorder_map
{struct MapKeyOfT{const K& operator()(const pair<K, V>& kv){return kv.first;}};
public://...
private:HashBucket::HashTable<K, pair<K, V>, Hash, MapKeyOfT> _ht;
};

set仿函数：

template<class K, class Hash = HashFunc<K>>
class unorder_set
{struct SetKeyOfT{const K& operator()(const K& key){return key;}};
public://...
private:HashBucket::HashTable<K, K, Hash, SetKeyOfT> _ht;
};

在这里插入图片描述

正向迭代器实现

哈希表只存在正向迭代器，哈希表的正向迭代器实际上是对整个哈希表进行了封装：

//前置声明
template<class K, class T, class Hash, class KeyOfT>
class HashTable;template<class K, class T, class Hash, class KeyOfT>
struct __HashIterator
{typedef HashNode<T> Node;typedef HashTable <K, T, Hash, KeyOfT> HT;typedef __HashIterator <K, T, Hash, KeyOfT> Self;Node* _node;HT* _pht;
}

*运算符重载

解引用操作就是返回单链表某个结点的数据：

T& operator*()
{return _node->_data;
}

->运算符重载

->操作就是返回数据的地址：

T* operator->()
{return &_node->_data;
}

++运算符重载

哈希表中++其实就是寻找当前哈希桶中的该结点下一个结点，如果一个哈希桶中已经寻找完，就去下一个哈希桶中进行寻找，直到找到为止；

代码如下：

Self& operator++()
{//寻找该结点下一个结点点if (_node->_next){//下一个结点不为空，就指向下一个结点_node = _node->_next;}else{Hash hash;KeyOfT kot;//为空就计算该哈希桶所处位置的哈希地址size_t i = hash(kot(_node->_data)) % _pht->_tables.size();//地址++就计算出下一个桶的位置i++;//继续循环寻找for (; i < _pht->_tables.size(); i++){if (_pht->_tables[i]){_node = _pht->_tables[i];break;}}//找完整个哈希表，就指向nullptrif (i == _pht->_tables.size()){_node = nullptr;}}return *this;
}

!= 和 == 运算符重载

!= 和 ==就是判断是不是同一个结点：

//!=
bool operator!=(const Self& s)
{return _node != s._node;
}
//==
bool operator==(const Self& s)
{return _node == s._node;
}

begin()与end()实现

begin函数返回哈希表当中第一个不为nullptr位置的正向迭代器。
end函数返回哈希表当中最后一个位置下一个位置的正向迭代器，这里直接用空指针构造一个正向迭代器。

class HashTable
{typedef HashNode<T> Node;template<class K, class T, class Hash, class KeyOfT>friend struct __HashIterator;
public:typedef __HashIterator <K, T, Hash, KeyOfT> iterator;iterator begin(){//从前往后遍历整个数组for (size_t i = 0; i < _tables.size(); i++){//找到不为空的位置并返回该位置迭代器if (_tables[i]){return iterator(_tables[i], this);}}//最后返回end();return end();}iterator end(){//返回一个为空的位置的迭代器return iterator(nullptr, this);}
}

unordered_set实现

template<class K, class Hash = HashFunc<K>>
class unordered_set
{struct SetKeyOfT{const K& operator()(const K& key){return key;}};
public:typedef typename HashBucket::HashTable <K, K, Hash, SetKeyOfT>::iterator iterator;iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}pair<iterator, bool> insert(const K& key){return _ht.Insert(key);}
private:HashBucket::HashTable<K, K, Hash, SetKeyOfT> _ht;
};

unordered_map实现

template<class K, class V, class Hash = HashFunc<K>>
class unordered_map
{struct MapKeyOfT{const K& operator()(const pair<K, V>& kv){return kv.first;}};
public:typedef typename HashBucket::HashTable <K, pair<K, V>, Hash, MapKeyOfT>::iterator iterator;iterator begin(){return _ht.begin();}iterator end(){return _ht.end();}pair<iterator, bool> insert(const K& key){_ht.Insert(key);}V& operator[](const K& key){pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));return ret.first->second;}
private:HashBucket::HashTable<K, pair<K, V>, Hash, MapKeyOfT> _ht;
};

map/set 与 unordered_map/unordered_set对比

map/set 底层是使用红黑树实现的，unordered_map/unordered_set底层是用哈希表进行实现的，两者的底层实现是不同的，对于少量的数据，他们的增删查改没有区别，但是对于大量的数据unordered系列是要更胜一筹的，特别是对于查找来说，unordered系列基本可以一直保持高效率；

哈希表调整后代码

#pragma oncetemplate<class K>
struct HashFunc
{size_t operator()(const K& key){//所有类型都强转为size_t类型return (size_t)key;}
};//模板特化
template<>
struct HashFunc<string>
{size_t operator()(const string& key){size_t val = 0;for (auto ch : key){val *= 131;val += ch;}return val;}
};namespace HashBucket
{template<class T>struct HashNode{T _data;HashNode<T>* _next;//构造函数HashNode(const T& data):_data(data),_next(nullptr){}};//前置声明template<class K, class T, class Hash, class KeyOfT>class HashTable;template<class K, class T, class Hash, class KeyOfT>struct __HashIterator{typedef HashNode<T> Node;typedef HashTable <K, T, Hash, KeyOfT> HT;typedef __HashIterator <K, T, Hash, KeyOfT> Self;Node* _node;HT* _pht;//构造函数__HashIterator(Node* node, HT* pht):_node(node),_pht(pht){}T& operator*(){return _node->_data;}T* operator->(){return &_node->_data;}Self& operator++(){//寻找该结点下一个结点点if (_node->_next){//下一个结点不为空，就指向下一个结点_node = _node->_next;}else{Hash hash;KeyOfT kot;//为空就计算该哈希桶所处位置的哈希地址size_t i = hash(kot(_node->_data)) % _pht->_tables.size();//地址++就计算出下一个桶的位置i++;//继续循环寻找for (; i < _pht->_tables.size(); i++){if (_pht->_tables[i]){_node = _pht->_tables[i];break;}}//找完整个哈希表，就指向nullptrif (i == _pht->_tables.size()){_node = nullptr;}}return *this;}bool operator!=(const Self& s){return _node != s._node;}bool operator==(const Self& s){return _node == s._node;}};template<class K, class T, class Hash, class KeyOfT>class HashTable{typedef HashNode<T> Node;template<class K, class T, class Hash, class KeyOfT>friend struct __HashIterator;public:typedef __HashIterator <K, T, Hash, KeyOfT> iterator;iterator begin(){for (size_t i = 0; i < _tables.size(); i++){if (_tables[i]){return iterator(_tables[i], this);}}return end();}iterator end(){return iterator(nullptr, this);}~HashTable(){for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_tables[i] = nullptr;}}inline size_t __stl_next_prime(size_t n){static const size_t __stl_num_primes = 28;static const size_t __stl_prime_list[__stl_num_primes] ={53, 97, 193, 389, 769,1543, 3079, 6151, 12289, 24593,49157, 98317, 196613, 393241, 786433,1572869, 3145739, 6291469, 12582917, 25165843,50331653, 100663319, 201326611, 402653189, 805306457,1610612741, 3221225473, 4294967291};for (size_t i = 0; i < __stl_num_primes; ++i){if (__stl_prime_list[i] > n){return __stl_prime_list[i];}}return -1;}pair<iterator, bool> Insert(const T& data){Hash hash;KeyOfT kot;//如果该键值对存在，就返回falseiterator ret = Find((kot(data)));if (ret != end()){return make_pair(ret, false);}//如果负载因子为1就扩容if (_size == _tables.size()){//创建一个新的哈希表vector<Node*> newTables;size_t newSizes = _size == 0 ? 10 : 2 * _tables.size();//将每个元素初始化为空newTables.resize(__stl_next_prime(_tables.size()), nullptr);//将旧表结点插入到新表当中for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){//记录cur的下一个结点Node* next = cur->_next;//计算相应的哈希桶编号size_t hashi = hash(kot(cur->_data)) % newTables.size();//将旧表结点移动值新表cur->_next = newTables[hashi];newTables[hashi] = cur;cur = next;}_tables[i] = nullptr;}_tables.swap(newTables);}//计算哈希桶编号size_t hashi = hash(kot(data)) % _tables.size();//插入结点Node* newnode = new Node(data);newnode->_next = _tables[hashi];_tables[hashi] = newnode;//元素个数++_size++;return make_pair(iterator(newnode, this), true);}//查找iterator Find(const K& key){//哈希表为空就返回空if (_tables.size() == 0){return end();}Hash hash;KeyOfT kot;//计算哈希地址size_t hashi = hash(key) % _tables.size();Node* cur = _tables[hashi];//遍历哈希桶while (cur){if (kot(cur->_data) == key){return iterator(cur, this);}cur = cur->_next;}return end();}//删除bool Erase(const K& key){//哈希表大小为0，删除失败if (_tables.size() == 0){return false;}Hash hash;//计算哈希地址size_t hashi = hash(key) % _tables.size();Node* prev = nullptr;Node* cur = _tables[hashi];//遍历哈希桶，寻找删除结点是否存在while (cur){if (hash(kot(cur->_data)) == key){if (prev){prev->_next = cur->_next;}else{_tables[hashi] = cur->_next;}//删除该结点delete cur;_size--;return true;}prev = cur;cur = cur->_next;}//删除结点不存在，返回falsereturn false;}private:vector<Node*> _tables;size_t _size = 0;};
}