编译技术实验三之编译器的构造和设计
一、实验目的:
我们将设计多个不同的综合实验项目提供给学生选择。(如:LL(1)文法自动生成语法分析程序的设计;单词的自动识别与智能纠错;语言的程序编辑器;数学计算式的识别等)学生可在这些项目中选择1个项目,体现了实验技术的全面性和先进性。综合实验将以编译原理、技术为基础,结合前期课程,加上实际需求和学生自己的设计才能完成。这些前期课程接口包括离散数学、数据结构、C++程序设计语言、操作系统、汇编语言、数据库原理等。通过综合实验使学生设计制作出有一定水平的编译程序,使学生更好地掌握编译原理和基本技术
设计一个小型编译器。
二、实验内容及原理
以高级程序设计语言的编译原理和基本技术,加上实际编辑程序过程的需求,把综合实验项目分层次展开,经过小组学生的分工与合作加上创意才能完成。
设计的语言上可以使用编译性语言,也可以使用解释性语言。实验的结果是一个编译器或一个综合编译技术的应用软件。
三、实验过程原始数据记录
主要结构:
char terminal[50]; //终结符
set<string>Noter; //非终结符
vector<string>nt;
vector<string>t;
int tNum;//终结符个数
int nNum;//非终结符个数
string Production[50] = { "" };//产生式
string ReProduction[50] = { "" };//新产生式
int ProNum;//产生式个数
int ReProNum = 0;//新产生式个数
vector<pair<string, string>>I;//所有项目
vector<pair<string, string>>closure;//项目的闭包
vector<pair<string, string>>project;
vector<pair<string, string>>Start;
map<pair<int, string>, int>table;
string str;
unordered_map<int, vector<pair<int, string>>>LRtable;
unordered_map<int, vector<pair<string, int>>>state;
string lrtable[100][100] = { "-1" };
vector<int>sq;//状态栈
vector<char>inputStr, sign;//全体字符以及符号栈
主要函数:
void Gramma()
void GrammaF()
void get_project()
vector<pair<string, string>> get_closure(const vector<pair<string, string>>& pro)
vector<pair<string, string>> Goto(const vector<pair<string, string>>& pro, const string& flag)
vector<vector<pair<string, string>>> get_all_closure(const vector<pair<string, string>>& pro)
int get_num(string s)
int getChar(char ch)
int get_rnext(string s)
void get_LRTable(vector<vector<pair<string, string>>>allclosure)
void init()
string outPut(int i)
void check()
void analyst()
源代码:
#include<iostream>
#include<vector>
#include<string>
#include<set>
#include<unordered_map>
#include<iomanip>
#include<map>
using namespace std;char terminal[50]; //终结符
set<string>Noter; //非终结符
vector<string>nt;
vector<string>t;
int tNum; //终结符个数
int nNum; //非终结符个数
string Production[50] = { "" }; //产生式
string ReProduction[50] = { "" }; //新产生式
int ProNum; //产生式个数
int ReProNum = 0; //新产生式个数
vector<pair<string, string>>I; //所有项目
vector<pair<string, string>>closure; //项目闭包
vector<pair<string, string>>project;
vector<pair<string, string>>Start;
map<pair<int, string>, int>table;
string str;
unordered_map<int, vector<pair<int, string>>>LRtable;
string lrtable[100][100] = { "-1" };
unordered_map<int, vector<pair<string, int>>>state;
vector<int>sq;//状态栈
vector<char>inputStr, sign;//全体字符以及符号栈void Gramma() {cout << "--------------------------" << endl;cout << "请输入终结符的个数:";cin >> tNum;cout << "请输入终结符:" << endl;for (int i = 0; i < tNum; i++) {cin >> terminal[i];string st;st.push_back(terminal[i]);t.push_back(st);}cout << "请输入非终结符的个数:";cin >> nNum;cout << "请输入非终结符:" << endl;for (int i = 0; i < nNum; i++) {string s;cin >> s;Noter.insert(s);nt.push_back(s);t.push_back(s);}cout << "请输入产生式的个数:";cin >> ProNum;cout << "请输入产生式:" << endl;for (int i = 0; i < ProNum; i++) {cin >> Production[i];}
}void GrammaF() {int x = 0, z = 1;//x为第z个产生式的第x个字符for (int i = 0; i < ProNum; i++) {x = 3;int k = 0;while (x < Production[i].size()) {for (k = 0; k < 3; k++) {//获取产生式的左部以及箭头ReProduction[z] += Production[i][k];}while (x < Production[i].size() && Production[i][x] != '|') {ReProduction[z] += Production[i][x];x++;}z++;x++;}}string s = ReProduction[1].substr(0, 1) + "'";str = s;ReProduction[0] = s + "->" + ReProduction[1].substr(0, 1);t.push_back(s);Noter.insert(s);nt.push_back(s);nNum++;ReProNum = z;project.push_back({ s,ReProduction[1].substr(0, 1) });for (int i = 1; i < ReProNum; i++) {string fir = ReProduction[i].substr(0, 1);string sec = ReProduction[i].substr(3);project.push_back({ fir,sec });}
}void get_project() {I.push_back({ ReProduction[0].substr(0,2),"." + ReProduction[0].substr(4) });Start.push_back({ ReProduction[0].substr(0,2),"." + ReProduction[0].substr(4) });I.push_back({ ReProduction[0].substr(0,2),ReProduction[0].substr(4) + "." });for (int i = 1; i < ReProNum; i++) {string First = "";First += ReProduction[i][0];string Second = ReProduction[i].substr(3);for (int j = 0; j <= Second.size(); j++) {string tmp = Second;tmp.insert(j, ".");I.push_back({ First,tmp });}}for (int i = 0; i < I.size(); i++) {cout << I[i].first << " " << I[i].second << endl;}
}vector<pair<string, string>> get_closure(const vector<pair<string, string>>& pro) {vector<pair<string, string>>closure = pro;auto Size = closure.size();string first;for (int i = 0; i < Size; i++) {auto pos = closure[i].second.find(".");if (pos == string::npos) {cout << "error" << endl;cout << closure[i].first << "-->>" << closure[i].second << endl;return closure;}auto c = closure[i].second[pos + 1];first.push_back(c);if (Noter.find(first) == Noter.end()) {first.clear();continue;}for (int i = 0; i < project.size(); i++) {if (first == project[i].first) {auto second = project[i].second;second.insert(0, ".");bool flag = 0;for (int j = 0; j < Size; j++) {if (second == closure[j].second) {flag = 1;break;}}if (flag == 0) {closure.push_back({ first,second });}}}first.clear();Size = closure.size();}return closure;
}vector<pair<string, string>> Goto(const vector<pair<string, string>>& pro, const string& f) {vector<pair<string, string>>nextJ;for (int i = 0; i < pro.size(); i++) {auto p = pro[i].second.find(".");if (p == string::npos) {cout << "error" << endl;return nextJ;}else {auto ch = pro[i].second[p + 1];if (ch == '\0')continue;string temp;temp.push_back(ch);if (temp != f)continue;else{auto first = pro[i].first;auto second = pro[i].second;second[p] = second[p + 1];second[p + 1] = '.';nextJ.push_back({ first, second });}}}auto T = nextJ;nextJ = get_closure(T);return nextJ;
}vector<vector<pair<string, string>>> get_all_closure(const vector<pair<string, string>>& pro) {vector<vector<pair<string, string>>>allclosure;allclosure.push_back(pro);auto size = allclosure.size();for (int i = 0; i < size; i++) {set<string>flag;auto len = allclosure[i].size();for (int j = 0; j < len; j++) {auto pos = allclosure[i][j].second.find(".");if (pos == string::npos) {cout << "error" << endl;return allclosure;}else {auto ch = allclosure[i][j].second[pos + 1];if (ch == '\0') {continue;}string str;str.push_back(ch);flag.insert(str);}}for (set<string>::iterator it = flag.begin(); it != flag.end(); it++) {auto closure = Goto(allclosure[i], *it);if (closure.size() == 0)continue;else {bool stop = 0;for (int j = 0; j < size; j++) {if (allclosure[j] == closure) {table.insert({ {i,*it},j });stop = 1;break;}}if (stop == 1) {continue;}allclosure.push_back(closure);int sz = allclosure.size() - 1;table.insert({ {i,*it}, sz });}}size = allclosure.size();}return allclosure;
}int get_num(string s) {for (int i = 0; i < t.size(); i++) {if (t[i] == s) {return i;}}return -1;
}int get_rnext(string s) {for (int i = 0; i < project.size(); i++) {if (s == project[i].second) {return i;}}return -1;
}void get_LRTable(vector<vector<pair<string, string>>>allclosure) {for (int i = 0; i < allclosure.size(); i++) {for (int j = 0; j < t.size(); j++) {lrtable[i][j] = "-1";}}vector<pair<int, int>>nums;bool st[100] = { false };int acc;for (map<pair<int, string>, int>::iterator it = table.begin(); it != table.end(); it++){int from = it->first.first;int next = it->second;string s = it->first.second;auto Inext = allclosure[next];auto Ifrom = allclosure[from];lrtable[from][get_num(s)] = "s" + to_string(next);for (int j = 0; j < Ifrom.size(); j++) {auto pos = Ifrom[j].second.find(".");if (pos == Ifrom[j].second.size() - 1 && Ifrom[j].first != str) {int rnext = get_rnext(Ifrom[j].second.substr(0, Ifrom[j].second.size() - 1));nums.push_back({ from,rnext });}if (pos == Ifrom[j].second.size() - 1 && Ifrom[j].first == str) {acc = from;}}st[from] = true;}lrtable[acc][get_num("#")] = "acc";for (int i = 0; i < allclosure.size(); i++) {if (!st[i]) {for (int j = 0; j < allclosure[i].size(); j++) {auto pos = allclosure[i][j].second.find(".");if (pos == allclosure[i][j].second.size() - 1) {int rnext = get_rnext(allclosure[i][j].second.substr(0, allclosure[i][j].second.size() - 1));nums.push_back({ i,rnext });}}}}for (int i = 0; i < nums.size(); i++) {int f = nums[i].first, rnext = nums[i].second;for (int j = 0; j < tNum; j++) {lrtable[f][j] = "r" + to_string(rnext);}}for (int i = 0; i < allclosure.size(); i++) {for (int j = 0; j < t.size(); j++) {if (lrtable[i][j] != "-1") {LRtable[i].push_back({ j,lrtable[i][j] });}}}
}int getChar(char ch) {string s;s.push_back(ch);for (int i = 0; i < t.size(); i++) {if (t[i] == s)return i;}return -1;
}
void init() {for (map<pair<int, string>, int>::iterator it = table.begin(); it != table.end(); it++){state[it->first.first].push_back({ it->first.second,it->second });}
}string outPut(int i) {char buf[100];int count = 0;if (i == 0) {string str;for (vector<int>::iterator it = sq.begin(); it != sq.end(); it++) {int c = *it;str += to_string(c);}return str;}else if (i == 1) {for (vector<char>::iterator it = sign.begin(); it != sign.end(); it++) {buf[count++] = *it;}}else {for (vector<char>::iterator it = inputStr.begin(); it != inputStr.end(); it++) {buf[count++] = *it;}}buf[count] = '\0';string str(buf);return str;
}
void check() {int step = 1;sign.push_back('#');sq.push_back(0);cout << setw(10) << "步骤" << setw(10) << "状态栈" << setw(10) << "符号栈" << setw(10) << "输入串" << setw(25) << "动作说明" << endl;int s = 0;int oldstatus;char ch = inputStr.front();int c = getChar(ch);if (c == -1) {cout << "error" << endl;return;}while (lrtable[s][getChar(ch)] != "acc") {string sstr = lrtable[s][getChar(ch)];int s1 = 0;for (int i = 1; i < sstr.size(); i++) {s1 = s1 * 10 + (sstr[i] - '0');}s = s1;if (sstr == "-1") {cout << "error" << endl;return;}if (sstr.substr(0, 1) == "s") {cout << setw(10) << step << setw(10) << outPut(0) << setw(10) << outPut(1) << setw(10) << outPut(2) << setw(10) << "A" << "CTION[" << sq.back() << "," << ch << "]=S" << s << "," << "状态" << s << "入栈" << endl;sign.push_back(ch);inputStr.erase(inputStr.begin());sq.push_back(s);}else if (sstr.substr(0, 1) == "r") {string formu = ReProduction[s];string stop = project[s].first;int strlen = formu.size();int popnum = strlen - 3;int i = 0;for (vector<int>::reverse_iterator it = sq.rbegin(); it != sq.rend(); it++) {i++;if (i == popnum + 1) {oldstatus = *it;break;}}int r = s;for (int i = 0; i < state[oldstatus].size(); i++) {if (state[oldstatus][i].first == stop) {s = state[oldstatus][i].second;}}cout << setw(10) << step << setw(10) << outPut(0) << setw(10) << outPut(1) << setw(10) << outPut(2) << setw(10) << "r" << r << (string)":" + Production[r] + (string)"归约,GOTO{" << oldstatus << "," << 'E' << ")=" << s << "入栈" << endl;for (int i = 0; i < popnum; i++) {sq.pop_back();sign.pop_back();}sq.push_back(s);sign.push_back(stop[0]);}step++;s = sq.back();ch = inputStr.front();}cout << setw(10) << step << setw(10) << outPut(0) << setw(10) << outPut(1) << setw(10) << outPut(2) << setw(10) << "A" << "cc:分析成功" << endl;
}
void analyst() {init();string s;cout << "请输入字符串(+,-,*,/,E,i,(,),#):" << endl;cin >> s;for (int i = 0; i < s.size(); i++)inputStr.push_back(s[i]);if (inputStr.back() != '#')inputStr.push_back('#');check();
}int main() {Gramma();GrammaF();get_project();auto S = get_closure(Start);auto allclosure = get_all_closure(S);for (int i = 0; i < allclosure.size(); i++) {cout << "I" << i << ":" << endl;for (int j = 0; j < allclosure[i].size(); j++) {cout << allclosure[i][j].first << "->" << allclosure[i][j].second << endl;}}cout << table.size() << endl;cout << "\n转换关系:" << endl;for (map<pair<int, string>, int>::iterator it = table.begin(); it != table.end(); it++){cout << "[ " << it->first.first << ", " << it->first.second << " ] -> " << it->second << endl;}get_LRTable(allclosure);for (int i = 0; i < project.size(); i++) {cout << ReProduction[i] << endl;}for (int i = 0; i < allclosure.size(); i++) {for (int j = 0; j < t.size(); j++) {cout << lrtable[i][j] << " ";}cout << endl;}analyst();}
结果:
语法分析:
生成项目集
拓广文法
生成LR分析表
四、实验总结
通过这个实验,我认识到编译器的重要性和复杂性。在设计和构造编译器的过程中,我需要考虑语法分析、词法分析、语义分析等多个环节,同时还要处理错误处理、优化等问题。在实验中,我需要将学习到的几个章节的理论知识转化为实际可执行的代码。我学会了如何根据给定的文法和规则构建语法分析树,如何设计和实现词法分析器,如何进行语义分析和中间代码生成等。在设计和构造编译器的过程中,我遇到了很多困难和挑战,需要不断地debug,这有利于巩固了我的编程能力,提高我的设计思维和解决问题的能力。