《编译原理》实验一:词法分析
语言:C++
环境:VS2019
实验要求:
1 分析
词法分析是编译过程的第一个阶段。
这个阶段的主要任务是:从左到右一个字符一个字符地读入源程序,对构成源程序的字符流进行扫描和分解,从而识别一个个单词;
(1)获取单词:获取程序字符,并删除注释和过滤空格,本文从输入流中获取单词;
(2)识别单词:识别单词属性;
bool isIdentifier(string s);//标识符
bool isKeywords(string s);//保留字
bool isDigit(string s);//整型数字
bool isOperator(string s);//运算符
bool isOperator(char c);//运算符
string result(string s);//根据传入的参数s产生对应的输出
(3)输出结果:根据已有的单词符号和相应的类别码输出。
2 代码
2.1识别保留字
保留字又称关键字。指在高级语言中已经定义过的字,使用者不能再将这些字作为变量名或过程名使用。
bool isKeywords(string s)//保留字
{
static vector<string> keyVec = { "main", "int", "float", "double", "char",
"if", "then","else", "switch", "case", "break", "continue", "while",
"do", "for" };
vector<string>::iterator result = find(keyVec.begin(), keyVec.end(), s);//输入的字符串s是否与保留字相等
if (result != keyVec.end())
return true;
else return false;
}
2.2识别标识符
标识符由字母(A-Z,a-z)、数字(0-9)、下划线“_”组成,并且首字符不能是数字。
bool isIdentifier(string s)
{
if (!isKeywords(s))//标识符不能是保留字
{
if ((s[0] >= 'a' && s[0] <= 'z') || (s[0] >= 'A' && s[0] <= 'Z') || s[0] == '_')//标识符由数字、字母、下划线组成,
//且不能以数字开头.
{
for (int i = 1; i < s.length(); i++)
{
if ((s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z')
|| (s[i] >= '0' && s[i] <= '9') || s[i] == '_')
continue;
else return false;
}
return true;
}
return false;
}
return false;
}
2.3识别整型数字
bool isDigit(string s)//整型数字,NUM=digit digit*
{
if (s[0] >= '0' && s[0] <= '9')
{
for (int i = 1; i < s.length(); ++i)
if (s[i] >= '0' && s[i] <= '9')
continue;
else return false;
return true;
}
return false;
}
2.4识别运算符
bool isOperator(string s)//运算符
{
static vector<string> opeVec = { "=","+","-","*","/","<","<=","==","!=",
">",">=",";","(",")","?",":","," };
vector<string>::iterator result = find(opeVec.begin(), opeVec.end(), s);
if (result != opeVec.end())
return true;
else return false;
}
bool isOperator(char c)//运算符
{
static vector<char> opeVec = { '=','+','-','*','/','<',
//"<=","==","!=",
'>',
//">=",
';','(',')','?',':',',' };
vector<char>::iterator result = find(opeVec.begin(), opeVec.end(), c);
if (result != opeVec.end())
return true;
else return false;
}
3 可视化
本文建立MFC项目实现词法分析可视化
3.1建立MFC项目
打开VS2019,创建新项目,在弹出的菜单搜索MFC
确定项目名称、选择存放位置
应用程序类型,选择“基于对话框”
用户界面功能,勾选“最大化”、“最小化”
点击完成后,如图
3.2创建对话框
删掉对话框的三个控件:
将左侧工具箱中“Edit Control”、“Button”、“Static Text”拖出并摆放如图位置
选中“Button1”,修改“描述文字”为“确定”
同样操作修改:
3.3修改控件属性
(1)左侧编辑框
ID:IDC_EDIT_IN
多行:True
想要返回:True
(2)右侧编辑框
ID:IDC_EDIT_OUT
多行:True
想要返回:True
(3)右击右侧编辑框->添加变量,取名“m_edit”
(4)选中最外层控件,修改最大化框和最小化框为True
3.4封装类
这一过程是封装前面写的词法分析核心代码,加入MFC工程。
右击项目工程名->添加->类
取个名字,确定
此时,在解决方案资源管理器里面,头文件和源文件分别多出了LX.h和LX.cpp
复制LX.h代码
#pragma once
# include<iostream>
# include<string>
# include<fstream>
# include<sstream>//流对象
# include<vector>
# include<map>
using namespace std;
class LX
{
public:
bool isIdentifier(string s);//标识符
bool isKeywords(string s);//保留字
bool isDigit(string s);//整型数字
bool isOperator(string s);//运算符
bool isOperator(char c);//运算符
string result(string s);//根据传入的参数s产生对应的输出
void main1(string file, string file1);//主处理函数
};
复制LX.cpp代码
#include "pch.h"
#include "LX.h"
using namespace std;
bool LX::isKeywords(string s)//保留字
{
static vector<string> keyVec = { "main", "int", "float", "double", "char",
"if", "then","else", "switch", "case", "break", "continue", "while",
"do", "for" };
vector<string>::iterator result = find(keyVec.begin(), keyVec.end(), s);//输入的字符串s是否与保留字相等
if (result != keyVec.end())
return true;
else return false;
}
bool LX::isOperator(char c)//运算符
{
static vector<char> opeVec = { '=','+','-','*','/','<',
//"<=","==","!=",
'>',
//">=",
';','(',')','?',':',',' };
vector<char>::iterator result = find(opeVec.begin(), opeVec.end(), c);
if (result != opeVec.end())
return true;
else return false;
}
bool LX::isDigit(string s)//整型数字,NUM=digit digit*
{
if (s[0] >= '0' && s[0] <= '9')
{
for (int i = 1; i < s.length(); ++i)
if (s[i] >= '0' && s[i] <= '9')
continue;
else return false;
return true;
}
return false;
}
bool LX::isOperator(string s)//运算符
{
static vector<string> opeVec = { "=","+","-","*","/","<","<=","==","!=",
">",">=",";","(",")","?",":","," };
vector<string>::iterator result = find(opeVec.begin(), opeVec.end(), s);
if (result != opeVec.end())
return true;
else return false;
}
//标识符,,试验要求:ID=letter(letter|digit)*
bool LX::isIdentifier(string s)
{
if (!isKeywords(s))//标识符不能是保留字
{
if ((s[0] >= 'a' && s[0] <= 'z') || (s[0] >= 'A' && s[0] <= 'Z') || s[0] == '_')//标识符由数字、字母、下划线组成,
//且不能以数字开头.
{
for (int i = 1; i < s.length(); i++)
{
if ((s[i] >= 'a' && s[i] <= 'z') || (s[i] >= 'A' && s[i] <= 'Z')
|| (s[i] >= '0' && s[i] <= '9') || s[i] == '_')
continue;
else return false;
}
return true;
}
return false;
}
return false;
}
string LX::result(string s)//根据传入的参数s产生对应的输出
{
//种别码
//1.标识符
if (isIdentifier(s))
return "(标识符--10," + s + ")";
//2.保留字
static map<string, string> keyMap;
keyMap["int"] = "1";
keyMap["float"] = "2";
keyMap["if"] = "3";
keyMap["switch"] = "4";
keyMap["while"] = "5";
keyMap["do"] = "6";
if (isKeywords(s))
return "(保留字--" + keyMap[s] + "," + s + ")";
//3.整型常量
if (isDigit(s))
return "(整型常量--11," + s + ")";
//4.运算符
static map<string, string> opeMap;
opeMap["="] = "(等号--17,=)";
opeMap["<"] = "(小于号--20,<)";
opeMap["<="] = "(小于等于号--21,<=)";
opeMap["=="] = "(赋值运算符--22,==)";
opeMap["!="] = "(不等于号--23,!=)";
opeMap[">"] = "(大于号--24,>)";
opeMap[">="] = "(大于等于号--25,>=)";
opeMap[";"] = "(分号--26,;)";
opeMap["+"] = "(加号--13,+)";
opeMap["("] = "( 左括号--27,( )";
opeMap["-"] = "(减号--14,-)";
opeMap[")"] = "(右括号--28,) )";
opeMap[">"] = "(大于号--24,>)";
opeMap["*"] = "(星号--15,*)";
opeMap["?"] = "(问号--29,?)";
opeMap["/"] = "(除号--16,/)";
opeMap[":"] = "(冒号--30,:)";
opeMap[","] = "(逗号--31,,)";
if (isOperator(s))
return opeMap[s];
return "Error";
}
void LX::main1(string file, string file1)//主处理函数
{
ifstream input(file);
ofstream output("Result.txt", ios::out);
input.close();
input.open(file);
string str;
string words;
while (getline(input, str)) //读取文件每一次读取一行,遇到EOF结束
{
//从输入流中获取单词,需要用到输入流对象,即istringstream
istringstream strCin(str);
string s;
while (strCin >> words)
{
//1.首先可以确定的是保留字肯定是单独作为一个单词的
if (isKeywords(words))
{
s = result(words);
// cout << s << endl;
output << s << endl;
continue;
}
//2,对单词进行扫描,肯定是标识符,运算符,逗号分号,数字等等混合在一起的单词
vector<int> index = { 0 };
vector<string> mulWords;//将words分解为多个单词
for (int i = 0; i < words.length(); i++)
{
//运算符有两位的,比如"<=",">=","==","!="
if ((i < words.length() - 1) && isOperator(words[i]) && isOperator(words[i + 1]))
{
//但是要注意只有以上四种两位运算符,比如+-,))就不是,但是))还是要输出),)
if (string(words.begin() + i, words.begin() + i + 2) == "<=" ||
string(words.begin() + i, words.begin() + i + 2) == ">=" ||
string(words.begin() + i, words.begin() + i + 2) == "==" ||
string(words.begin() + i, words.begin() + i + 2) == "!=")
{
index.push_back(i);
index.push_back(i + 2);
++i;
}
else if (isOperator(words[i]))
{
if (find(index.begin(), index.end(), i) == index.end())
index.push_back(i);
if (find(index.begin(), index.end(), i + 1) == index.end())
index.push_back(i + 1);
}
}
//逗号,运算符作为分隔
else if (isOperator(words[i]))
{
if (find(index.begin(), index.end(), i) == index.end())
//比如遇到"a,b"这里下标0和1将a分开,1到2将逗号分开,2到3将b分开
index.push_back(i);
if (find(index.begin(), index.end(), i + 1) == index.end())
index.push_back(i + 1);
}
}
for (int i = 0; i < index.size() - 1; i++)
{
string rel;
//比如遇到"<=",需要提取”<=“
/*if (isOperator(words[index[i]]) && isOperator(words[index[i + 1]]))
{
rel = result(string(words.begin() + index[i], words.begin() + index[i + 2]));
++i;
}
else*/
rel = result(string(words.begin() + index[i], words.begin() + index[i + 1]));
output << rel << endl;
//cout << rel << endl;
}
}
}
output << endl;
output.close();
input.close();
}
3.5编写代码
双击对话框中“确定”
在void CLexerDlg::OnBnClickedButton1()里添加代码:
// TODO: 在此添加控件通知处理程序代码
//定义字符串in存储输入框和输出框的内容
CString in, out;
GetDlgItemText(IDC_EDIT_IN, in);
//将输入框内容存入TestData.txt,Result.txt文件中
CString filename1("Result.txt");
CStdioFile myFile1;
CFileException fileException1;
if (myFile1.Open(filename1, CFile::typeText | CFile::modeCreate | CFile::modeReadWrite), &fileException1)
{
myFile1.WriteString(in);
}
else
{
TRACE("Can't open file %s,error=%u
", filename1, fileException1.m_cause);
}
myFile1.Close();
CString filename("TestData.txt");
CStdioFile myFile;
CFileException fileException;
if (myFile.Open(filename, CFile::typeText | CFile::modeCreate | CFile::modeReadWrite), &fileException)
{
myFile.WriteString(in);
}
else
{
TRACE("Can't open file %s,error=%u
", filename, fileException.m_cause);
}
myFile.Close();
//词法分析
LX a;
string file = ("TestData.txt");
string file1 = ("Result.txt");
a.main1(file,file1);
//将Result.txt文件内容打印到输出框
//CStdioFile myfile1;
//CString fileText;
//CString flieLine;
ifstream myfile1;
char buffer[256];
CString flieLine(_T(""));
myfile1.open("Result.txt");
//myfile1.Open(L"Result.txt", CFile::modeReadWrite);
while (myfile1.getline(buffer, 256, '
'))
{
strcat_s(buffer, 256, "
");
flieLine += buffer;
}
m_edit.SetWindowText(flieLine);
myfile1.close();
在LexerDlg.cpp前面添加头文件#include "LX.h"
注意:如图,可能会出现红色波浪线,一般是由于修改EditControl的ID,系统没有更新,忽略即可。