当前位置：首页 > news >正文

unordered_map和unordered_set特性以及解决哈希冲突

news 2025/7/25 11:21:11

1. 哈希表的特性及核心概念

1. 哈希表的特性及核心概念

哈希表（Hash Table）是一种高效的键值对存储结构，通过哈希函数建立键与存储位置的映射关系，实现平均 O (1) 时间复杂度的插入、查找和删除操作。

哈希函数（Hash Function）
将任意大小的输入（键值）映射到固定范围的输出（哈希地址）的函数。理想的哈希函数应具有：
- 确定性：同一键值始终映射到同一地址
- 均匀性：键值分布均匀，减少冲突
- 高效性：计算快速，时间复杂度为 O (1)
哈希冲突（Hash Collision）
不同键值通过哈希函数得到相同相同哈希地址的现象。解决方法主要有：
- 链地址法（Separate 链接法）：每个哈希地址对应一个链表 / 红黑树，冲突元素依次存储在链表中
- 开放定址法：冲突发生时，通过线性探测、二次探测等方式寻找下一个空闲位置
- 再哈希法：使用多个哈希函数，冲突时切换函数重新计算地址
负载因子（Load Factor）
哈希表中元素数量与桶（Bucket）数量的比值，计算公式：负载因子 = 元素数 / 桶数。负载因子过大会导致冲突率上升，通常当负载因子超过阈值（如 0.7）时触发扩容。
扩容（Resizing）
当负载因子超标时，创建更大的桶数组（通常为原大小的 2 倍或素数倍），并将所有元素重新哈希到新桶中，以降低冲突率。
桶（Bucket）
哈希表的基本存储单元，每个桶对应一个哈希地址，可存放单个元素或冲突元素组成的链表 / 树。

开放地址法的代码实现

#pragma once
#include<vector>enum State
{EXIST,EMPTY,DELETE
};template<class K, class V>
struct HashData
{pair<K, V> _kv;State _state = EMPTY;
};template<class K>
struct HashFunc
{size_t operator()(const K& key){return (size_t)key;}
};template<>
struct HashFunc<string>
{size_t operator()(const string& s){// BKDRsize_t hash = 0;for (auto ch : s){hash += ch;hash *= 131;}return hash;}
};inline unsigned long __stl_next_prime(unsigned long n)
{// Note: assumes long is at least 32 bits.static const int __stl_num_primes = 28;static const unsigned long __stl_prime_list[__stl_num_primes] = {53, 97, 193, 389, 769,1543, 3079, 6151, 12289, 24593,49157, 98317, 196613, 393241, 786433,1572869, 3145739, 6291469, 12582917, 25165843,50331653, 100663319, 201326611, 402653189, 805306457,1610612741, 3221225473, 4294967291};const unsigned long* first = __stl_prime_list;const unsigned long* last = __stl_prime_list + __stl_num_primes;const unsigned long* pos = lower_bound(first, last, n);return pos == last ? *(last - 1) : *pos;
}namespace open_address
{template<class K, class V, class Hash = HashFunc<K>>class HashTable{public:HashTable():_tables(__stl_next_prime(0)), _n(0){}bool Insert(const pair<K, V>& kv){if (Find(kv.first))return false;// 负载因子 >= 0.7扩容if (_n * 10 / _tables.size() >= 7){//vector<HashData<K, V>> newtables(_tables.size()*2);//for (auto& data : _tables)//{//	// 旧表的数据映射到新表//	if (data._state == EXIST)//	{//		size_t hash0 = data._kv.first % newtables.size();//		// ...//	}//}//_tables.swap(newtables);HashTable<K, V, Hash> newht;//newht._tables.resize(_tables.size() * 2);newht._tables.resize(__stl_next_prime(_tables.size() + 1));for (auto& data : _tables){// 旧表的数据映射到新表if (data._state == EXIST){newht.Insert(data._kv);}}_tables.swap(newht._tables);}Hash hash;size_t hash0 = hash(kv.first) % _tables.size();size_t hashi = hash0;size_t i = 1;int flag = 1;while (_tables[hashi]._state == EXIST){// 线性探测hashi = (hash0 + i) % _tables.size();++i;/*hashi = (hash0 + (i*i*flag)) % _tables.size();if (hashi < _tables.size())hashi += _tables.size();if (flag == 1){flag = -1;}else{++i;flag = 1;}*/}_tables[hashi]._kv = kv;_tables[hashi]._state = EXIST;++_n;return true;}HashData<K, V>* Find(const K& key){Hash hash;size_t hash0 = hash(key) % _tables.size();size_t hashi = hash0;size_t i = 1;while (_tables[hashi]._state != EMPTY){if (_tables[hashi]._state == EXIST&& _tables[hashi]._kv.first == key){return &_tables[hashi];}// 线性探测hashi = (hash0 + i) % _tables.size();++i;}return nullptr;}bool Erase(const K& key){HashData<K, V>* ret = Find(key);if (ret){ret->_state = DELETE;return true;}else{return false;}}private:vector<HashData<K, V>> _tables;size_t _n;  // 记录数据个数};
}

1.2链式地址法

#pragma once
#include<vector>
#include<xutility>
using namespace std;
enum State
{EXIST,EMPTY,DELETE
};template<class K, class V>
struct HashData
{pair<K, V> _kv;State _state = EMPTY;
};template<class K>
struct HashFunc
{size_t operator()(const K& key){return (size_t)key;}
};template<>
struct HashFunc<string>
{size_t operator()(const string& s){// BKDRsize_t hash = 0;for (auto ch : s){hash += ch;hash *= 131;}return hash;}
};inline unsigned long __stl_next_prime(unsigned long n)
{// Note: assumes long is at least 32 bits.static const int __stl_num_primes = 28;static const unsigned long __stl_prime_list[__stl_num_primes] = {53, 97, 193, 389, 769,1543, 3079, 6151, 12289, 24593,49157, 98317, 196613, 393241, 786433,1572869, 3145739, 6291469, 12582917, 25165843,50331653, 100663319, 201326611, 402653189, 805306457,1610612741, 3221225473, 4294967291};const unsigned long* first = __stl_prime_list;const unsigned long* last = __stl_prime_list + __stl_num_primes;const unsigned long* pos = lower_bound(first, last, n);return pos == last ? *(last - 1) : *pos;
}namespace hash_bucket
{template<class K,class V>struct HashNode{pair<K, V> _kv;HashNode<K, V>* _next;HashNode(const pair<K,V>& kv):_kv(kv),_next(nullptr){}};template<class K,class V,class Hash=HashFunc<K>>class HashTable{typedef HashNode<K,V> Node;public:Hashtable(): _tables(11),_n(0){}bool Insert(const pair<K, V>& kv){if (_n == _tables.size()){vector<Node*> newht(__stl_next_prime(_tables.size()+1));for (int i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->next;size_t hashi = cur->kv.first % newth.size();cur->_next = newTatble[hashi];newTatble[hashi] = cur;cur = next;}_tables[i] = nullptr;}_tables.swap(newTatble);}size_t hashi = kv.first % _tables.size();// 头插Node* newnode = new Node(kv);newnode->_next = _tables[hashi];_tables[hashi] = newnode;++_n;return true;}private:vector<Node*> _tables; // 指针数组size_t _n = 0;		   // 表中存储数据个数};
}

2. 模拟实现 unordered_map 和 unordered_set 的要点分析

底层哈希表设计

unordered_map和unordered_set底层均依赖哈希表实现，区别在于存储的数据类型：

unordered_set存储单个键值（键即值）
unordered_map存储键值对（pair<Key, Value>）

核心设计要点：

桶数组：使用动态数组存储桶，每个桶为链表头指针（链地址法解决冲突）
节点结构：包含键（或键值对）和指向下一节点的指针
迭代器：需支持遍历桶内链表和跨桶移动，重载++、*、->等操作符

hash_backet的头文件

#pragma once
#include<vector>
#include<xutility>enum State
{EXIST,EMPTY,DELETE
};template<class K, class V>
struct HashData
{pair<K, V> _kv;State _state = EMPTY;
};template<class K>
struct HashFunc
{size_t operator()(const K& key){return (size_t)key;}
};template<>
struct HashFunc<string>
{size_t operator()(const string& s){// BKDRsize_t hash = 0;for (auto ch : s){hash += ch;hash *= 131;}return hash;}
};inline unsigned long __stl_next_prime(unsigned long n)
{// Note: assumes long is at least 32 bits.static const int __stl_num_primes = 28;static const unsigned long __stl_prime_list[__stl_num_primes] = {53, 97, 193, 389, 769,1543, 3079, 6151, 12289, 24593,49157, 98317, 196613, 393241, 786433,1572869, 3145739, 6291469, 12582917, 25165843,50331653, 100663319, 201326611, 402653189, 805306457,1610612741, 3221225473, 4294967291};const unsigned long* first = __stl_prime_list;const unsigned long* last = __stl_prime_list + __stl_num_primes;const unsigned long* pos = lower_bound(first, last, n);return pos == last ? *(last - 1) : *pos;
}namespace hash_bucket
{template<class T>struct HashNode{T _data;HashNode<T>* _next;HashNode(const T& data):_data(data),_next(nullptr){}};template<class K,class T,class Ref,class Ptr,class Hash,class KeyOfT>struct HTIterator{typedef HashNode<T> Node;typedef HashTables<K, T, KeyOfT, Hash> HT;typedef HTIterator<K, T, KeyOfT, Ref, Ptr, Hash> Self;Node* _node;const HT* _ht;HTIterator(Node* node, const HT* ht):_node(node), _ht(ht){}Ref operator*(){return _node->_data;}Ptr operator->(){return &_node->_data;}bool operator!=(const Self& s){return _node != s._node;}Self& operator++(){if (_node->_next){_node = _node->_next;}else{KeyOfT kot;Hash hash;size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();++hashi;while (hashi < _ht->_tables.size()){_node = _ht->_tables[hashi];if (_node)break;else++hashi;}if (hashi == _ht->_tables.size()){_node = nullptr;}}return this*}};template<class K,class T,class KeyOfT,class Hash>class HashTables{// 友元声明template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>friend struct HTIterator;typedef HashNode<T> Node;public:typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;Iterator brgin(){if (_n == 0)return End();for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];if (cur){return Iterator(cur, this);}}}Iterator End(){return Iterator(nullptr, this);}ConstIterator Begin() const{if (_n == 0)return End();for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];if (cur){return ConstIterator(cur, this);}}return End();}ConstIterator End() const{return ConstIterator(nullptr, this);}HashTable():_tables(__stl_next_prime(0)), _n(0){}// 拷贝构造和赋值重载也需要~HashTable(){for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;delete cur;cur = next;}_tables[i] = nullptr;}}pair<Iterator, bool> Insert(const T& data){KeyOfT kot;Iterator it = Find(kot(data));if (it != End())return { it, false };Hash hash;// 负载因子 == 1时扩容if (_n == _tables.size()){vector<Node*> newTable(__stl_next_prime(_tables.size() + 1));for (size_t i = 0; i < _tables.size(); i++){Node* cur = _tables[i];while (cur){Node* next = cur->_next;// 头插到新表size_t hashi = hash(kot(cur->_data)) % newTable.size();cur->_next = newTable[hashi];newTable[hashi] = cur;cur = next;}_tables[i] = nullptr;}_tables.swap(newTable);}size_t hashi = hash(kot(data)) % _tables.size();// 头插Node* newnode = new Node(data);newnode->_next = _tables[hashi];_tables[hashi] = newnode;++_n;return { Iterator(newnode, this), false };}Iterator Find(const K& key){KeyOfT kot;Hash hash;size_t hashi = hash(key) % _tables.size();Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){return Iterator(cur, this);}cur = cur->_next;}return End();}bool Erase(const K& key){KeyOfT kot;size_t hashi = key % _tables.size();Node* prev = nullptr;Node* cur = _tables[hashi];while (cur){if (kot(cur->_data) == key){if (prev == nullptr){// 头结点_tables[hashi] = cur->_next;}else{// 中间节点prev->_next = cur->_next;}delete cur;--_n;return true;}else{prev = cur;cur = cur->_next;}}return false;}private:vector<Node*> _tables; // 指针数组size_t _n = 0;		   // 表中存储数据个数};
}

unordered_set的模拟实现

#pragma once
#include"hash_bucket.h"
namespace aaa
{template<class K, class Hash = HashFunc<K>>class unordered_set{struct SetKeyOfT{const K& operator()(const K& key){return key;}};public:typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::ConstIterator const_iterator;iterator begin(){return _ht.Begin();}iterator end(){return _ht.End();}const_iterator begin() const{return _ht.Begin();}const_iterator end() const{return _ht.End();}pair<iterator, bool> insert(const K& key){return _ht.Insert(key);}iterator Find(const K& key){return _ht.Find(key);}bool Erase(const K& key){return _ht.Erase(key);}private:hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;};
}

unordered_map的模拟实现

#pragma once
#include"hash_bucket.h"
namespace aaa
{template<class K, class V, class Hash = HashFunc<K>>class unordered_map{struct MapKeyOfT{const K& operator()(const pair<K, V>& kv){return kv.first;}};public:typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::ConstIterator const_iterator;iterator begin(){return _ht.Begin();}iterator end(){return _ht.End();}const_iterator begin() const{return _ht.Begin();}const_iterator end() const{return _ht.End();}V& operator[](const K& key){pair<iterator, bool> ret = insert({ key, V() });return ret.first->second;}pair<iterator, bool> insert(const pair<K, V>& kv){return _ht.Insert(kv);}iterator Find(const K& key){return _ht.Find(key);}bool Erase(const K& key){return _ht.Erase(key);}private:hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;};
}

总的来说，unordered_map和unordered_set的模拟实现核心在于：