当前位置: 首页 > news >正文

模拟实现unordered_map

1.定义

unordered_map 是 C++ 标准库中的哈希表容器,特点是无序存储平均 O (1) 时间复杂度的插入 / 查找 / 删除操作。其核心原理是通过哈希函数将关键字映射到哈希桶(bucket),再通过链表或红黑树处理哈希冲突。

2.实现原理

1. 哈希表(Hash Table)
  • 作用:存储键值对的底层数据结构,由哈希桶数组组成。
  • 结构vector<Node*> 或 vector<list<Node>>,每个元素(桶)对应一个链表(处理哈希冲突)。
2. 哈希函数(Hash Function)
  • 作用:将关键字(Key)映射为哈希桶的索引(整数)。
  • 要求
    • 输出范围需在哈希桶数组大小范围内(通过取模 % bucket_count 实现)。
    • 尽可能减少哈希冲突(不同 Key 映射到同一索引)。

 例子:

template <typename Key>
size_t HashFunc(const Key& key) {// 对整数直接取哈希return static_cast<size_t>(key);
}// 对字符串的哈希
template <>
size_t HashFunc(const std::string& key) {size_t hash = 0;for (char c : key) {hash = hash * 31 + c; // 31 是质数,减少冲突}return hash;
}

3. 哈希冲突(Hash Collision)
  • 问题:不同的 Key 通过哈希函数得到相同的桶索引。
  • 解决方式
    • 链地址法(最常用):每个桶对应一个链表,冲突的键值对依次插入链表。
    • 开放定址法:冲突时寻找下一个空桶(较少用于 unordered_map)。
4. 负载因子(Load Factor)
  • 定义负载因子 = 元素个数 / 桶数量
  • 作用:衡量哈希表的拥挤程度,超过阈值(通常为 1.0)时需要扩容
  • 扩容机制
    • 创建新的、更大的桶数组(通常为原大小的 2 倍,且为质数)。
    • 将所有元素重新哈希(rehash)到新桶中,避免哈希冲突加剧。
5. 键值对(Key-Value Pair)
  • 结构:存储关键字(Key)和值(Value)的节点,例如:
template <typename Key, typename Value>
struct Node {Key key;Value value;Node* next; // 指向链表中下一个节点(处理冲突)Node(const Key& k, const Value& v) : key(k), value(v), next(nullptr) {}
};

3.主要实现

1. 构造函数与析构函数

template <typename Key, typename Value>
class my_UnorderedMap {
private:std::vector<Node<Key, Value>*> buckets; // 哈希桶数组size_t size; // 当前元素个数size_t bucket_count; // 桶数量const float load_factor_threshold = 1.0f; // 负载因子阈值public:// 构造函数:初始化桶数量(默认 10 个桶)my_UnorderedMap(size_t init_buckets = 10) : bucket_count(init_buckets), size(0) {buckets.resize(bucket_count, nullptr);}// 析构函数:释放所有节点和桶~my_UnorderedMap() {for (auto bucket : buckets) {Node<Key, Value>* cur = bucket;while (cur) {Node<Key, Value>* next = cur->next;delete cur;cur = next;}}}
};

2.插入操作

bool my_insert(const Key& key, const Value& value) {// 检查是否需要扩容if (size * 1.0 / bucket_count >= load_factor_threshold) {rehash(bucket_count * 2); // 扩容为原大小的 2 倍}// 计算哈希索引size_t index = HashFunc(key) % bucket_count;// 检查 Key 是否已存在(不允许重复 Key)Node<Key, Value>* cur = buckets[index];while (cur) {if (cur->key == key) {return false; // Key 已存在,插入失败}cur = cur->next;}// 插入新节点(头插法)Node<Key, Value>* new_node = new Node<Key, Value>(key, value);new_node->next = buckets[index]; // 新节点指向原链表头buckets[index] = new_node; // 桶头指向新节点size++;return true;
}

 3.查找

Value* my_find(const Key& key) {size_t index = HashFunc(key) % bucket_count;Node<Key, Value>* cur = buckets[index];while (cur) {if (cur->key == key) {return &(cur->value); // 找到 Key,返回值的指针}cur = cur->next;}return nullptr; // 未找到
}

 4.删除

bool my_erase(const Key& key) {size_t index = HashFunc(key) % bucket_count;Node<Key, Value>* cur = buckets[index];Node<Key, Value>* prev = nullptr;while (cur) {if (cur->key == key) {// 移除节点if (prev == nullptr) {// 头节点删除buckets[index] = cur->next;} else {prev->next = cur->next;}delete cur;size--;return true;}prev = cur;cur = cur->next;}return false; // 未找到 Key
}

 5.扩容

void my_rehash(size_t new_bucket_count) {if (new_bucket_count <= bucket_count) return; // 新桶数量必须更大// 创建新桶数组std::vector<Node<Key, Value>*> new_buckets(new_bucket_count, nullptr);// 将旧桶中的元素重新哈希到新桶for (size_t i = 0; i < bucket_count; i++) {Node<Key, Value>* cur = buckets[i];while (cur) {Node<Key, Value>* next = cur->next; // 保存下一个节点// 计算新索引size_t new_index = HashFunc(cur->key) % new_bucket_count;// 插入新桶(头插法)cur->next = new_buckets[new_index];new_buckets[new_index] = cur;cur = next;}}// 替换旧桶数组buckets.swap(new_buckets);bucket_count = new_bucket_count;
}

6.移动复制和拷贝赋值

// 拷贝赋值运算符my_UnorderedMap& operator=(const UnorderedMap& other) {if (this != &other) {clear();bucket_count = other.bucket_count;load_factor_threshold = other.load_factor_threshold;hash_fn = other.hash_fn;equal_fn = other.equal_fn;buckets.resize(bucket_count, nullptr);for (size_t i = 0; i < other.bucket_count; ++i) {Node* current = other.buckets[i];while (current != nullptr) {insert(current->key, current->value);current = current->next;}}}return *this;}// 移动赋值运算符my_UnorderedMap& operator=(UnorderedMap&& other) noexcept {if (this != &other) {clear();buckets = std::move(other.buckets);element_count = other.element_count;bucket_count = other.bucket_count;load_factor_threshold = other.load_factor_threshold;hash_fn = std::move(other.hash_fn);equal_fn = std::move(other.equal_fn);other.element_count = 0;other.bucket_count = 0;}return *this;}

---完整代码;

#include <vector>
#include <cstddef>
#include <functional>
#include <stdexcept>template <typename Key, typename Value, typename Hash = std::hash<Key>, typename Equal = std::equal_to<Key>>
class my_UnorderedMap {
private:// 键值对节点结构struct Node {Key key;Value value;Node* next;Node(const Key& k, const Value& v) : key(k), value(v), next(nullptr) {}};std::vector<Node*> buckets;  // 哈希桶数组size_t element_count;        // 当前元素数量size_t bucket_count;         // 桶的数量float load_factor_threshold; // 负载因子阈值// 哈希函数包装器Hash hash_fn;// 相等比较器Equal equal_fn;// 检查是否为质数bool is_prime(size_t num) const {if (num <= 1) return false;if (num == 2) return true;if (num % 2 == 0) return false;for (size_t i = 3; i * i <= num; i += 2) {if (num % i == 0) return false;}return true;}// 获取下一个质数size_t next_prime(size_t num) const {if (num <= 2) return 2;size_t candidate = num;if (candidate % 2 == 0) ++candidate;while (!is_prime(candidate)) {candidate += 2;}return candidate;}// 重新哈希所有元素到新的桶数组void my_rehash(size_t new_bucket_count) {if (new_bucket_count <= bucket_count) return;std::vector<Node*> new_buckets(new_bucket_count, nullptr);// 将旧桶中的所有节点迁移到新桶for (size_t i = 0; i < bucket_count; ++i) {Node* current = buckets[i];while (current != nullptr) {Node* next = current->next;size_t new_index = hash_fn(current->key) % new_bucket_count;current->next = new_buckets[new_index];new_buckets[new_index] = current;current = next;}}// 交换新旧桶数组buckets.swap(new_buckets);bucket_count = new_bucket_count;}public:// 构造函数explicit my_UnorderedMap(size_t initial_buckets = 10, const Hash& hash = Hash(), const Equal& equal = Equal()): element_count(0), bucket_count(next_prime(initial_buckets)),load_factor_threshold(1.0f),hash_fn(hash),equal_fn(equal) {buckets.resize(bucket_count, nullptr);}// 析构函数~my_UnorderedMap() {clear();}// 拷贝构造函数my_UnorderedMap(const UnorderedMap& other): element_count(0), bucket_count(other.bucket_count),load_factor_threshold(other.load_factor_threshold),hash_fn(other.hash_fn),equal_fn(other.equal_fn) {buckets.resize(bucket_count, nullptr);for (size_t i = 0; i < other.bucket_count; ++i) {Node* current = other.buckets[i];while (current != nullptr) {insert(current->key, current->value);current = current->next;}}}// 移动构造函数my_UnorderedMap(UnorderedMap&& other) noexcept: buckets(std::move(other.buckets)),element_count(other.element_count),bucket_count(other.bucket_count),load_factor_threshold(other.load_factor_threshold),hash_fn(std::move(other.hash_fn)),equal_fn(std::move(other.equal_fn)) {other.element_count = 0;other.bucket_count = 0;}// 拷贝赋值运算符my_UnorderedMap& operator=(const UnorderedMap& other) {if (this != &other) {clear();bucket_count = other.bucket_count;load_factor_threshold = other.load_factor_threshold;hash_fn = other.hash_fn;equal_fn = other.equal_fn;buckets.resize(bucket_count, nullptr);for (size_t i = 0; i < other.bucket_count; ++i) {Node* current = other.buckets[i];while (current != nullptr) {insert(current->key, current->value);current = current->next;}}}return *this;}// 移动赋值运算符my_UnorderedMap& operator=(UnorderedMap&& other) noexcept {if (this != &other) {clear();buckets = std::move(other.buckets);element_count = other.element_count;bucket_count = other.bucket_count;load_factor_threshold = other.load_factor_threshold;hash_fn = std::move(other.hash_fn);equal_fn = std::move(other.equal_fn);other.element_count = 0;other.bucket_count = 0;}return *this;}// 插入元素bool my_insert(const Key& key, const Value& value) {// 检查负载因子,必要时扩容if (static_cast<float>(element_count) / bucket_count >= load_factor_threshold) {rehash(next_prime(bucket_count * 2));}size_t index = hash_fn(key) % bucket_count;Node* current = buckets[index];// 检查键是否已存在while (current != nullptr) {if (equal_fn(current->key, key)) {return false; // 键已存在,插入失败}current = current->next;}// 创建新节点并插入链表头部Node* new_node = new Node(key, value);new_node->next = buckets[index];buckets[index] = new_node;++element_count;return true;}// 查找元素Value* my_find(const Key& key) {size_t index = hash_fn(key) % bucket_count;Node* current = buckets[index];while (current != nullptr) {if (equal_fn(current->key, key)) {return &(current->value);}current = current->next;}return nullptr; // 未找到}// 查找元素(const 版本)const Value* my_find(const Key& key) const {size_t index = hash_fn(key) % bucket_count;Node* current = buckets[index];while (current != nullptr) {if (equal_fn(current->key, key)) {return &(current->value);}current = current->next;}return nullptr; // 未找到}// 重载[]运算符Value& operator[](const Key& key) {Value* value_ptr = find(key);if (value_ptr == nullptr) {// 键不存在,插入默认值insert(key, Value());value_ptr = find(key);}return *value_ptr;}// 删除元素bool my_erase(const Key& key) {size_t index = hash_fn(key) % bucket_count;Node* current = buckets[index];Node* previous = nullptr;while (current != nullptr) {if (equal_fn(current->key, key)) {if (previous == nullptr) {// 删除头节点buckets[index] = current->next;} else {previous->next = current->next;}delete current;--element_count;return true;}previous = current;current = current->next;}return false; // 未找到键}// 获取元素数量size_t size() const {return element_count;}// 检查是否为空bool my_empty() const {return element_count == 0;}// 清空容器void clear() {for (size_t i = 0; i < bucket_count; ++i) {Node* current = buckets[i];while (current != nullptr) {Node* next = current->next;delete current;current = next;}buckets[i] = nullptr;}element_count = 0;}// 获取负载因子float load_factor() const {return static_cast<float>(element_count) / bucket_count;}// 获取负载因子阈值float max_load_factor() const {return load_factor_threshold;}// 设置负载因子阈值void max_load_factor(float new_threshold) {if (new_threshold > 0) {load_factor_threshold = new_threshold;}}// 调整桶的数量void reserve(size_t count) {size_t new_bucket_count = next_prime(count);if (new_bucket_count > bucket_count) {rehash(new_bucket_count);}}// 迭代器类class Iterator {private:Node* current;const std::vector<Node*>* buckets;size_t bucket_index;size_t bucket_count;// 移动到下一个非空桶void move_to_next_bucket() {while (current == nullptr && bucket_index < bucket_count) {++bucket_index;if (bucket_index < bucket_count) {current = (*buckets)[bucket_index];}}}public:Iterator(Node* node, const std::vector<Node*>* b, size_t index, size_t count): current(node), buckets(b), bucket_index(index), bucket_count(count) {if (current == nullptr) {move_to_next_bucket();}}Iterator& operator++() {if (current != nullptr) {current = current->next;if (current == nullptr) {++bucket_index;move_to_next_bucket();}}return *this;}bool operator==(const Iterator& other) const {return current == other.current;}bool operator!=(const Iterator& other) const {return !(*this == other);}std::pair<const Key&, Value&> operator*() const {return {current->key, current->value};}std::pair<const Key&, Value&>* operator->() const {// 这里简化实现,实际需要返回一个代理对象throw std::runtime_error("Arrow operator not fully implemented");}};// 迭代器接口Iterator begin() const {if (empty()) {return end();}return Iterator(buckets[0], &buckets, 0, bucket_count);}Iterator end() const {return Iterator(nullptr, &buckets, bucket_count, bucket_count);}
};

http://www.lryc.cn/news/584838.html

相关文章:

  • 如何使用 Python 删除 Excel 中的行、列和单元格 – 详解
  • 如何从0开始构建自己的第一个AI应用?(Prompt工程、Agent自定义、Tuning)
  • 格密码--数学基础--02基变换、幺模矩阵与 Hermite 标准形
  • AI金融风控:识别欺诈,量化风险的新利器
  • pandas销售数据分析
  • python 在 Linux CentOS 上安装 playwright 的完整步骤
  • Pandas:常见的转换函数(rename,set_index,reset_index)
  • MD2Doc转换器(基于Python)
  • [Reverse1] Tales of the Arrow
  • 飞算 JavaAI 深度体验:开启 Java 开发智能化新纪元
  • 闲庭信步使用图像验证平台加速FPGA的开发:第八课——图像数据的行缓存
  • Locust 负载测试工具使用教程
  • 为什么选择Selenium自动化测试?
  • [特殊字符]远程服务器配置pytorch环境
  • ajax和XMLHttpRequest以及fetch
  • STM32-DAC数模转换
  • day21——特殊文件:XML、Properties、以及日志框架
  • C#元组:从基础到实战的全方位解析
  • 实现在线预览pdf功能,后台下载PDF
  • 使用gdal读取shp及filegdb文件
  • 通过ETL工具,高效完成达梦数据库数据同步至数仓Oracle的具体实现
  • Primer Premier 5分子生物学引物设计软件 PCR引物设计工具
  • Swift 解 LeetCode 324:一步步实现摆动排序 II,掌握数组重排的节奏感
  • 智能文本抽取在合同管理实战应用
  • P1484 种树,特殊情形下的 WQS 二分转化。
  • 【9】PostgreSQL 之 vacuum 死元组清理
  • 从语音识别到智能助手:Voice Agent 的技术进化与交互变革丨Voice Agent 学习笔记
  • 如何将 iPhone 文件传到 Mac?
  • 模型训练的常用方法及llama-factory支持的数据训练格式
  • 微服务引擎 MSE 及云原生 API 网关 2025 年 6 月产品动态