当前位置：首页 > article >正文

202403-02-相似度计算 csp认证

article 2025/8/27 0:49:05

在这里插入图片描述

其实这个问题就是求两篇文章的词汇的交集和并集，首先一说到并集，我就想到了set集合数据结构，set中的元素必须唯一。
STL之set的基本使用–博客参考

所以将两个文章的词汇全部加入set中，并求出set的大小，即为并集的大小。

#include <iostream>
#include <string>
#include <set>using namespace std;void toupper(string &str)
{for (int i = 0; i < str.size(); i++){if (str[i] >= 'a' && str[i] <= 'z'){str[i] = str[i] - ('a' - 'A');}}
}int main() {int n, m;cin >> n >> m;string word;set<string> first_set;  // 存储第一篇文章的单词set<string> union_set;  // 存储并集// 读取第一篇文章for(int i = 0; i < n; i++){cin >> word;toupper(word);first_set.insert(word);union_set.insert(word);} int intersection = 0;  // 交集数量// 读取第二篇文章for(int i = 0; i < m; i++){cin >> word;toupper(word);// 判断是否在第一篇文章中出现过if(first_set.find(word) != first_set.end()){intersection++;first_set.erase(first_set.find(word)); // 在第一篇文章的set中删除，这样第二篇文章中出现 连续两个the的时候只会统计一次 }// 加入并集union_set.insert(word);} // 输出结果cout << "交集数量: " << intersection << endl;cout << "并集数量: " << union_set.size() << endl;system("pause");  // 仅用于调试环境，提交代码时建议移除return 0;
}

但是我一开始选用的是unordered_map。。。我也不知道为什么、

#include <iostream>
#include <string>
#include <unordered_map>
using namespace std;
void toUpperCase(string &str)
{for (int i = 0; i < str.size(); i++){if (char(str[i]) >= 'a' && char(str[i])  'z'){str[i] = str[i] - (char('a') - 'A');}}
}void PrintMap(const unordered_map<string, int>& rd)
{cout << "------------------" << endl;for (auto i : word){cout << i.first << " " << i.second << dl;}}
int main()
{int n, m; // 两篇文章的单词个数cin >> n >> m;string word;unordered_map<string, int> nword;unordered_map<string, int> mword;unordered_map<string, int> mixed;for (int i = 0; i < n; i++){cin >> word;toUpperCase(word);nword[word] = 1;mixed[word]++;}int sum = 0; // 并集数量 for (int i = 0; i < m; i++){cin >> word;toUpperCase(word);mixed[word]++;if(nword[word] > 0 && mword[word] == 0){ // 如果在第一篇文章已经存在 而且是第二篇文章第一次读取到 sum++;}mword[word] = 1;}PrintMap(nword), PrintMap(mword);PrintMap(mixed);cout << sum << endl;cout << mixed.size() << endl;system("pause");return 0;
}//更加节省空间的方法 
#include <iostream>
#include <string>
#include <unordered_map>
using namespace std;
void toUpperCase(string &str)
{for (int i = 0; i < str.size(); i++){if (char(str[i]) >= 'a' && char(str[i]) <= 'z'){str[i] = str[i] - (char('a') - 'A');}}
}void PrintMap(const unordered_map<string, int>& word)
{cout << "------------------" << endl;for (auto i : word){cout << i.first << " " << i.second << endl;}}
int main()
{int n, m; // 两篇文章的单词个数cin >> n >> m;string word;    unordered_map<string, int> mixed;for (int i = 0; i < n; i++){cin >> word;toUpperCase(word);mixed[word] = 1; // 表示在两篇文章中第一篇出现 }int sum = 0; // 并集数量 for (int i = 0; i < m; i++){cin >> word;toUpperCase(word);if(mixed[word] == 1){sum++;mixed[word] = 2; // 表现在第二篇出现 } }PrintMap(mixed);cout << sum << endl;cout << mixed.size() << endl;return 0;
}