当前位置：首页 > news >正文

从O(k*n)到O(1)：如何用哈希表终结多层if判断的性能困局

news 2025/9/15 7:16:37

【前言】
本文将以哈希表重构实战为核心，完整展示如何将传统条件匹配逻辑(上千层if-else判断)转化为O(1)的哈希表高效实现。通过指纹验证场景的代码级解剖，您将深入理解：
1.哈希函数设计如何规避冲突陷阱
2.链式寻址法的工程实现细节
若有出错之处，望各位大哥大姐指出(●’◡’●)

Ⅰ 背景

最近，拿到一个场景，有一个研判规则中，需要一次匹配上千个以上规则的规则，一开始采用的是多层if判断，但是这种在高频事件中，明显性能遭不住，而且在研判速度上远远达不到预期

最初代码如下

bool is_finger(char finger[]){if (strlen(finger) == yyy){return 0;}if (strlen(finger) == xxx){return 0;}//………………还有几千个规则研判
}

【目标】将程序的时间复杂度O(k*n)，降至O(1)
【实现】可以采用两种，一是哈希表，二是字典树

Ⅱ C程序优化实践

说那么多，没啥用，直接实操，冲冲冲
先定义下变量和结构体


#define HASH_SIZE 1024  // 哈希表大小，应该是质数以减少冲突typedef struct HashNode {char* key;struct HashNode* next;  // 处理冲突用的链表
} HashNode;typedef struct {HashNode* table[HASH_SIZE];
} HashMap;

初始化哈希表


// 哈希函数
unsigned int hash(const char* str) {unsigned int hash = 5381;int c;while ((c = *str++)) {hash = ((hash << 5) + hash) + c; // hash * 33 + c}return hash % HASH_SIZE;
}
// 初始化哈希表
HashMap* init_fingerprint_map() {HashMap* map = (HashMap*)malloc(sizeof(HashMap));memset(map->table, 0, sizeof(HashNode*) * HASH_SIZE);// 需要过滤的指纹列表const char* fingerprints[] = {"En", "nTf.n", "kno:n", "n)on", "fknn","kn", "n&n", "nn", "n&nn", "Ton",};// 插入所有指纹for (int i = 0; i < sizeof(fingerprints)/sizeof(char*); i++) {unsigned int index = hash(fingerprints[i]);HashNode* node = (HashNode*)malloc(sizeof(HashNode));node->key = strdup(fingerprints[i]);node->next = map->table[index];map->table[index] = node;}return map;
}

关键实现，哈希查找

// 查找函数 - O(1) 平均时间复杂度
bool is_fingerprint(HashMap* map, const char* fingerprint) {unsigned int index = hash(fingerprint);HashNode* current = map->table[index];// 在链表中查找while (current != NULL) {if (strcmp(current->key, fingerprint) == 0) {return false;  // 找到匹配项，返回false}current = current->next;}return true;  // 未找到匹配项
}

记得要释放内存

// 释放哈希表内存
void free_hashmap(HashMap* map) {for (int i = 0; i < HASH_SIZE; i++) {HashNode* current = map->table[i];while (current != NULL) {HashNode* temp = current;current = current->next;free(temp->key);free(temp);}}free(map);
}

完整代码

#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>#define HASH_SIZE 1024  // 哈希表大小，应该是质数以减少冲突typedef struct HashNode {char* key;struct HashNode* next;  // 处理冲突用的链表
} HashNode;typedef struct {HashNode* table[HASH_SIZE];
} HashMap;// 哈希函数
unsigned int hash(const char* str) {unsigned int hash = 5381;int c;while ((c = *str++)) {hash = ((hash << 5) + hash) + c; // hash * 33 + c}return hash % HASH_SIZE;
}// 初始化哈希表
HashMap* init_fingerprint_map() {HashMap* map = (HashMap*)malloc(sizeof(HashMap));memset(map->table, 0, sizeof(HashNode*) * HASH_SIZE);// 需要过滤的指纹列表const char* fingerprints[] = {"En", "nTf.n", "kno:n", "n)on", "fknn","kn", "n&n", "nn", "n&nn", "Ton",};// 插入所有指纹for (int i = 0; i < sizeof(fingerprints)/sizeof(char*); i++) {unsigned int index = hash(fingerprints[i]);HashNode* node = (HashNode*)malloc(sizeof(HashNode));node->key = strdup(fingerprints[i]);node->next = map->table[index];map->table[index] = node;}return map;
}// 查找函数 - O(1) 平均时间复杂度
bool is_fingerprint(HashMap* map, const char* fingerprint) {unsigned int index = hash(fingerprint);HashNode* current = map->table[index];// 在链表中查找while (current != NULL) {if (strcmp(current->key, fingerprint) == 0) {return false;  // 找到匹配项，返回false}current = current->next;}return true;  // 未找到匹配项
}// 释放哈希表内存
void free_hashmap(HashMap* map) {for (int i = 0; i < HASH_SIZE; i++) {HashNode* current = map->table[i];while (current != NULL) {HashNode* temp = current;current = current->next;free(temp->key);free(temp);}}free(map);
}
int main() {// 初始化（只需要一次）HashMap* map = init_fingerprint_map();// 快速查找并打印结果bool result1 = is_fingerprint(map, "En");printf("Test 'En': %s\n", result1 ? "true" : "false");bool result2 = is_fingerprint(map, "kn");printf("Test 'kn': %s\n", result2 ? "true" : "false");bool result3 = is_fingerprint(map, "other");printf("Test 'other': %s\n", result3 ? "true" : "false");// 清理资源free_hashmap(map);return 0;
}

Ⅲ 深度解析哈希表为啥能O(1)

1. 先了解下什么是哈希表？

想象你有一个带编号的储物柜（这就是哈希表）：
在这里插入图片描述

哈希函数就像一个规则，告诉你把东西放在哪个柜子里
比如：把字符串 “hello” 放到 3 号柜子

回到一开始说的"为什么说查找是 O(1)"!
当你要找 “hello” 时：

用哈希函数算出位置：3
我们直接去 3 号柜子找
这样子，是不是就不需要查看其他柜子了，直接O(1)，起飞芜湖~~

2. 哈希冲突到底是什么

了解了什么是哈希表，那开始熟悉下哈希冲突
假设现在：

“hello” -> 3号柜子
“world” -> 也是3号柜子

处理冲突的方式:储物柜用链子连接

好了，了解了基本逻辑，基本可以上C代码

// 假设我们有一个小型哈希表，存储常见编程语言
#define HASH_SIZE 8  // 8个储物柜// 存储数据
hash("Python") -> 3
hash("Java") -> 3    // 冲突！
hash("Go") -> 5储物柜：
0    1    2    3          4    5     6    7
[  ] [  ] [  ] [Python]-> [Java] [Go] [  ] [  ]// 查找"Java"的过程
1. hash("Java") = 3           // 计算位置
2. 检查3号柜子的 Python      // 不是
3. 检查下一个 Java          // 找到了！

3.哈希表为什么快

想象一个真实的哈希表

#define HASH_SIZE 1024  // 1024个储物柜// 如果存100个数据
// 平均每个柜子只会有 100/1024 ≈ 0.1 个数据
// 也就是说，大多数柜子是空的！

联想实际场景：图书馆找书

不需要从第一本找到最后一本
直接根据编号去对应书架
即使这个位置有几本书，也只需要看很少几本

这样子，就是不是很清晰了，其实哈希表，就是拿着key，拿到索引，然后去对应柜子找东西
按照这个思路，来解读下刚刚写的哈希查找代码

bool is_fingerprint(HashMap* map, const char* fingerprint) {// 1. 计算应该去哪个储物柜unsigned int index = hash(fingerprint);// 2. 去到那个储物柜HashNode* current = map->table[index];// 3. 如果这个储物柜有多个物品，挨个检查while (current != NULL) {// 4. 检查是不是要找的东西if (strcmp(current->key, fingerprint) == 0) {return false;  // 找到了！}current = current->next;  // 看下一个}return true;  // 没找到
}