zoukankan      html  css  js  c++  java
  • 简单的C语言编译器--语法分析器

      语法分析算是最难的一部分了。总而言之,语法分析就是先设计一系列语法,然后再用设计好的语法去归约词法分析中的结果。最后将归约过程打印出来,或者生成抽象语法树。

    1. 设计文法

    以下是我的文法(引入的M和N是方便以后的语义分析):

    1.1、基本框架

    Program -> Type main() Block
    Type -> int | bool
    Block -> { Stmts return Num ; }
    Decl -> Type Array ;
    Array -> Identifier [ Num ] | Identifier [ Identifier ] | Identifier
    Stmts -> Stmts M Stmt | Stmt
    

    1.2、标识符和常数

    Letter -> A | B | C | D | E | F | G | H | I | J | K | L | M | N 
        | O | P | Q | R | S | T | U | V | W | X | Y | Z | a | b
        | c | d | e | f | g | h | i | j | k | l | m | n | o | p
        | q | r | s | t | u | v | w | x | y | z | _
    Digit -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
    Non_digit -> 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
    Num -> Num Digit | Non_digit
    Identifier -> Identifier Digit | Identifier Letter | Letter
    Bool_value -> true | false
    

    1.3、运算符

    Self_op -> ++ | --
    HLogic_op -> &&
    LLogic_op -> ||
    HMath_op -> * | / | %
    LMath_op -> + | - 
    Judge_op -> == | != | >= | <= | > | <
    

    1.4、语句块框架

    Stmt -> Assignment ;
    	| Decl
        | if ( Bool ) M Stmt
        | if ( Bool ) M Stmt N else M Stmt
        | while M ( Bool ) M Stmt
        | for ( Fora ; M Forb ; Forc ) M Stmt
        | { Stmts }
    Fora -> Assignment | ε
    Forb -> Bool | ε
    Forc -> ForAssignment | ε
    

    1.5、赋值语句

    Assignment -> LArray = Bool ; | LArray Self_op ; | Self_op LArray ; 
    
    ForAssignment -> LArray = Bool ; | LArray Self_op ; | Self_op LArray ; 
    

    1.6、条件语句

    Factor -> ( Bool ) | Array | Num | Bool_value | ! ( Bool )
    HExpr -> HExpr HMath_op Factor | Factor
    LExpr -> LExpr LMath_op HExpr | HExpr
    Rel -> Rel Judge_op LExpr | LExpr
    HRel -> HRel HLogic_op M Rel | Rel
    Bool -> Bool LLogic_op M HRel | HRel
    

    1.7、辅助

    M -> ε 
    N -> ε	
    

    注意,标识符和常数那块是为了词法分析构造DFA,实际语法分析的时候将所有标识符当做Identifier,把所有的常数当成Num即可。

    2. 代码部分

      代码的关键是用getTable.h/cpp实现语法分析表,然后用状态栈分析测试的语法是否能够被归约成功,并且得到归约的顺序,即语法分析树。myParser.h/cpp便是利用得到的分析表进行栈分析,但是由于是制导翻译,所以分析这块和语义翻译一起做,所以下一节再介绍。

    getTable.h

    #ifndef MYPARSER_H
    #define MYPARSER_H
    
    #include <fstream>
    #include <unordered_map>
    #include <unordered_set>
    #include <string>
    #include <vector>
    #include <cstddef>
    #include <functional>
    
    
    struct Project {
    	int pro_num;  //表示第pro_num个产生式,0表示拓展的产生式
    	int dot_position;  //当前点的位置,位置的取值比产生式右端长度大1
    	std::unordered_set<std::string> successors;  //后继符
    	bool operator==(Project &right);
    };
    
    struct Grammer {
    	std::string left;
    	std::vector<std::string> right;
    };	
    
    /*
     * hash函数返回哈希值
     * eq则为set提供相等操作
     * 这样就可以把Project作为set的关键词
     */
    
    
    size_t hasher(const Project &p);
    bool eq(const Project &p1, const Project &p2);
    
    
    class GetTable {
    	public:
    		typedef std::unordered_set<Project, decltype(hasher) *, decltype(eq) *> Status; 
    		typedef std::unordered_map<int, std::unordered_map<std::string, std::string>> Atable;
    		typedef std::unordered_map<int, std::unordered_map<std::string, int>> Gtable;
    		GetTable();
    		void get_grammer(std::ifstream &in); //从文本中读取文法规则
    		void get_first();	//获得first集合
    		void get_follow(); //获得follow集合
    		bool inVT(std::string s); //判断在V还是T里,1为V
    		void judge_first(std::vector<std::string> s , std::unordered_set<std::string> & result); //判断串s的first集合是否包含ε
    		void get_closure(Status &p); //求项目闭包
    		bool judge_repeat(Status s1, Status s2); //判断两个状态是否重复
    		bool judge_conflict(Status & s, std::unordered_set<std::string> &result); //判断一个状态是否有移进-归约冲突
    		void get_status(); //获得状态集合
    		void print(std::ofstream &o1, std::ofstream &o2); //打印归约产生式子
    	private:
    		std::unordered_map<std::string,std::unordered_set<int>> index; //存储各非终结符的定义的序列号集合
    		std::vector<Grammer> G;  //存储文法
    		std::vector<std::string> V, T; //存储非终结符和终结符
    		std::unordered_map<std::string, std::unordered_set<std::string>> first, follow;  //存储first和follow集合
    		std::unordered_map<int, Status> statuses; //规范状态集
    		Atable action;
    		Gtable goTo;
    
    };
    
    #endif
    

    getTable.cpp

    #include <iostream>
    #include "getTable.h"
    
    
    bool Project::operator==(Project &right) {
    	if(this->pro_num == right.pro_num && this->dot_position == right.dot_position && this->successors.size() == right.successors.size()) {
    		for(auto t : right.successors) {
    			if(this->successors.find(t) == this->successors.end())
    				return false;
    		}
    
    		return true;
    	}
    
    	return false;
    }
    
    
    size_t hasher(const Project &p) {
    	return std::hash<int>() (p.pro_num*p.dot_position);
    }
    
    
    bool eq(const Project &p1, const Project &p2) {
    	if(p1.pro_num == p2.pro_num && p1.dot_position == p2.dot_position) 
    		return true;
    	return false;
    }
    
    
    GetTable::GetTable(){
    	//初始化非终结符和终结符
    	V = {"S","Program","Type","Block","Stmts","Decl","Stmt","ForAssignment","Assignment","Bool", "Rel", "LExpr","HExpr","Factor","Self_op","HLogic_op","LLogic_op","HMath_op","LMath_op","Judge_op","Bool_value","Array","Fora","Forb","Forc","HRel","LArray","M","N"};
    
    	T = {"(" ,")","main","int","bool","return",";","{","}","if","else","while","for","Identifier","Num","[","]","true","false","==","!=",">=","<=",">","<","+","-","*","/","%","||","&&","++","--","!","-",";","=","ε"};
    
    	std::unordered_set<std::string> empty_set;
    	std::unordered_set<int> empty_set_int;
    
    	//初始化first和follow集
    	for(auto i : V) { 
    		first.insert({i, empty_set});
    		follow.insert({i, empty_set});
    		index.insert({i,empty_set_int});
    	}
    
    
    	follow["Program"].insert("#");
    }
    
    
    void GetTable::get_grammer(std::ifstream &in) {
    	std::string tmp;
    	Grammer gtmp;
    	gtmp.left = "S";
    	gtmp.right = {"Program"};
    	G.push_back(gtmp);
    
    	while(in >> tmp) {
    		gtmp.left = tmp;
    		gtmp.right.clear();
    		in >> tmp >> tmp;
    		while(tmp != "#") {
    			gtmp.right.push_back(tmp);	
    			in >> tmp;
    		}
    		G.push_back(gtmp);
    	}
    	
    	for(int i = 0; i < static_cast<int>(G.size()); i++) 
    		index[G[i].left].insert(i);	
    }
    
    bool GetTable::inVT(std::string s) {
    	for(auto i : V) {
    		if(i == s)
    			return true;
    	}
    
    	return false;
    }
    
    
    void GetTable::get_first() {
    	bool change = true; //表示若改动了一处,则需要重新便利
    	bool is_empty; //表示产生式右端为空串
    	int t; 
    	//循环,直到没有改动为止,即change = false
    	while(change) {
    		change = false;
    		//循环每个文法
    		for(auto &g : G) {
    			is_empty = true;
    			t = 0;
    			while(is_empty && t < static_cast<int>(g.right.size())) {
    				is_empty = false;
    				if(!inVT(g.right[t])) {
    					if(first[g.left].find(g.right[t]) == first[g.left].end()) {
    						first[g.left].insert(g.right[t]);
    						change = true;
    					}
    					continue;
    				}  
    				for(auto i : first[g.right[t]]) {
    					if(first[g.left].find(i) == first[g.left].end()) {
    						first[g.left].insert(i);
    						change = true;
    					}
    				}
    
    				if(first[g.right[t]].find("ε") != first[g.right[t]].end()) {
    					is_empty = true;
    					t++;
    				}
    			}
    			if(t == static_cast<int>(g.right.size()) && first[g.left].find("ε") == first[g.left].end()) {
    				first[g.left].insert("ε");
    				change = true;
    			}
    		}
    	}
    
    	first.erase("S");
    	
    }
    
    void GetTable::judge_first(std::vector<std::string> s, std::unordered_set<std::string> &result) {
    	int count = 0;
    	for(auto i : s) {
    		if(!inVT(i)) {
    			result.insert(i);
    			break;
    		}
    		if(first[i].find("ε") == first[i].end()) {
    			result.insert(first[i].begin(),first[i].end());
    			break;
    		} 
    
    		result.insert(first[i].begin(), first[i].end());
    		result.erase("ε");
    		count++;
    	
    	}
    
    	if(count == static_cast<int>(s.size()))
    		result.insert("ε");
    }
    
    void GetTable::get_follow() {
    	//与求first集类似,设置change
    	bool change = true;
    	while(change) {
    		change = false;
    		//循环每个文法
    		std::vector<std::string>::iterator search_next; //向后扫描指针
    		for(auto &g : G) {
    			for(auto it = g.right.begin(); it != g.right.end(); it++) {
    				if(!inVT(*it)) 
    					continue;
    				auto original = follow[*it].size();
    				search_next = it + 1;
    				if(search_next != g.right.end()) {
    					std::vector<std::string> tmp(search_next,g.right.end());	
    					std::unordered_set<std::string> stmp;
    					judge_first(tmp,stmp);
    					follow[*it].insert(stmp.begin(),stmp.end());
    					if(stmp.find("ε") != stmp.end()) {
    						follow[*it].erase("ε");
    						follow[*it].insert(follow[g.left].begin(),follow[g.left].end());
    					} 
    					if(original < follow[*it].size())
    						change = true;
    				} else {
    					follow[*it].insert(follow[g.left].begin(), follow[g.left].end());
    					if(original < follow[*it].size())
    						change = true;
    				}
    			}
    		}
    	}
    
    	follow.erase("S");
    }
    
    void GetTable::get_closure(Status &p) {
    	bool change = true;
    	Status pptmp;
    	while(change) {
    		change = false;
    		pptmp = p;
    		for(auto &pro : pptmp) {
    			if(pro.dot_position == static_cast<int>(G[pro.pro_num].right.size()))
    				continue;
    			std::string V = G[pro.pro_num].right[pro.dot_position];
    			if(!inVT(V)) 
    				continue;
    			std::unordered_set<std::string> new_successor;
    
    			//求新项目的后继符集
    			if(pro.dot_position == static_cast<int>(G[pro.pro_num].right.size()) - 1)
    				new_successor = pro.successors;
    			else {
    				std::vector<std::string> vtmp(G[pro.pro_num].right.begin()+pro.dot_position+1,G[pro.pro_num].right.end()); 
    				judge_first(vtmp, new_successor);
    				if(new_successor.find("ε") != new_successor.end()) {
    					new_successor.insert(pro.successors.begin(),pro.successors.end());
    					new_successor.erase("ε");
    				}
    			}
    			Project ptmp;
    			for(auto &i : index[V]) {
    				ptmp.pro_num = i;
    				ptmp.dot_position = 0;
    				ptmp.successors = new_successor;
    				if(p.find(ptmp) == p.end()) {
    					p.insert(ptmp);
    					change = true;
    				}
    				else {
    					auto original = p.find(ptmp)->successors.size(); 
    					ptmp.successors.insert(p.find(ptmp)->successors.begin(),p.find(ptmp)->successors.end());
    					p.erase(*p.find(ptmp));
    					p.insert(ptmp);
    
    					if(original < ptmp.successors.size()) 
    						change = true;
    				}	
    			}
    		}
    	}
    }
    
    bool GetTable::judge_repeat(Status s1, Status s2) {
    	if(s1.size() == s2.size()) {
    		for(auto pros : s1) {
    			if(s2.find(pros) == s2.end())
    				return false;	
    			else {
    				Project tmp = *(s2.find(pros));
    				if(!(tmp == pros))
    					return false;
    			}
    		}
    		return true;
    	}
    	return false;
    }
    
    
    bool GetTable::judge_conflict(Status & s, std::unordered_set<std::string> &result) {
    	bool flag = false;
    	std::unordered_set<std::string> tmp;
    	for(auto pro : s) {
    		if(pro.dot_position == static_cast<int>(G[pro.pro_num].right.size())) 
    			tmp.insert(pro.successors.begin(),pro.successors.end());
    	}
    
    	for(auto pro : s) {
    		if(pro.dot_position < static_cast<int>(G[pro.pro_num].right.size())) {
    			std::string next = G[pro.pro_num].right[pro.dot_position];
    			if(tmp.find(next) != tmp.end()) {
    				result.insert(next);
    				flag = true;
    			}
    		}
    	}
    
    	return flag;
    
    }
    	
    
    void GetTable::get_status() {
    	//初始化第一个状态0
    	int t = 0;
    	Project ptmp;
    	ptmp.dot_position = 0;
    	ptmp.pro_num = 0;
    	ptmp.successors = {"#"};
    	Status tmp_status(10,hasher,eq);
    	tmp_status.insert(ptmp);
    	get_closure(tmp_status);
    
    	statuses.insert({t,tmp_status});
    
    	//获取状态集合
    	bool change = true;
    	std::unordered_set<int> record; //记录已经求过的状态
    	std::unordered_map<int, Status> sstmp;
    	std::unordered_set<std::string> conflict;
    
    	while(change) {
    		change = false;
    		sstmp = statuses;
    		
    		for(auto &sta : sstmp) {
    			if(record.find(sta.first) != record.end()) 
    				continue;
    			record.insert(sta.first);
    			//judge_conflict(sta.second,conflict);
    			/*
    			 * 开始球状态转移,用一个set<string>记录被转移过的状态
    			 */
    			std::unordered_set<std::string> record_status;
    			for(auto pros : sta.second) {
    				if(G[pros.pro_num].right[0] == "ε" || pros.dot_position == static_cast<int>(G[pros.pro_num].right.size())) {
    					for(auto sucess : pros.successors) {  
    						if(action[sta.first].find(sucess) == action[sta.first].end())
    						action[sta.first][sucess] = "r" + std::to_string(pros.pro_num);
    					}
    					continue;
    				}
    				std::string trans = G[pros.pro_num].right[pros.dot_position];
    				if(record_status.find(trans) != record_status.end()) 
    					continue;
    				record_status.insert(trans);
    				tmp_status.clear();
    				ptmp.pro_num = pros.pro_num;
    				ptmp.dot_position = pros.dot_position + 1;
    				ptmp.successors = pros.successors;
    				tmp_status.insert(ptmp);
    				for(auto protmp : sta.second) {
    					if(G[protmp.pro_num].right[protmp.dot_position] == trans && !(protmp == pros)) {
    						ptmp.pro_num = protmp.pro_num;
    						ptmp.dot_position = protmp.dot_position + 1;
    						ptmp.successors = protmp.successors;
    						tmp_status.insert(ptmp);
    					}
    				}
    				get_closure(tmp_status);
    				bool flag = true;
    				for(auto s : sstmp) {
    					if(judge_repeat(s.second,tmp_status)) {
    						if(inVT(trans))
    							goTo[sta.first][trans] = s.first;
    						else
    							action[sta.first][trans] = "s" + std::to_string(s.first);
    						flag = false;
    						break;
    					}							
    				}
    				if(!flag) 
    					continue;
    				statuses.insert({++t, tmp_status});
    				change = true;
    
    				//填写goTo表和action表
    				if(inVT(trans)) 
    					goTo[sta.first][trans] = t;
    				else 
    					action[sta.first][trans] = "s" + std::to_string(t);
    			}
    		}
    	}
    	//添加acc
    	action[goTo[0]["Program"]]["#"] = "acc";
    
    }
    
    
    void GetTable::print(std::ofstream &o1, std::ofstream &o2) {
    	for(auto f1 : action) {
    		for(auto f2 : f1.second)
    			o1 << f1.first << " " << f2.first << " " << f2.second << std::endl;
    	}
    
    	for(auto f1 : goTo) {
    		for(auto f2 : f1.second)
    			o2 << f1.first << " " << f2.first << " " << f2.second << std::endl;
    	}
    }
    
    
    int main() {
    	GetTable test;
    	std::ifstream gin("grammer.txt");
    	std::ifstream lin("lexer.out");
    	std::ofstream action("action.out");
    	std::ofstream goTo("goto.out");
    	test.get_grammer(gin);
    	test.get_first();
    	test.get_follow();
    	test.get_status();
    	test.print(action,goTo);
    
    
    
    	gin.close();
    	lin.close();
    	action.close();
    	goTo.close();
    	return 0;
    }
    
  • 相关阅读:
    汉语6级试卷(转)
    看《第一滴血4》
    又爱又恨奥沙利文
    最爱“剁椒鱼头”
    中国移动用户请注意长途电话包有陷阱
    推荐一个免费原版KTV / MTV下载网站
    传说中的人品问题
    信息更改致乒乓球爱好者
    贴几张我家乐乐的照片
    Web Parts: From SharePoint to ASP.NET 2.0
  • 原文地址:https://www.cnblogs.com/vachester/p/6884350.html
Copyright © 2011-2022 走看看