zoukankan      html  css  js  c++  java
  • 简单的C语言编译器--词法分析器

    1. 定义词法单元Tag

      首先要将可能出现的词进行分类,可以有不同的分类方式。如多符一类:将所有逗号、分号、括号等都归为一类,或者一符一类,将一个符号归为一类。我这里采用的是一符一类的方式。C代码如下:

        #ifndef TAG_H
        #define TAG_H
        
        namespace Tag {
        	//保留字
        	const int
        		INT = 1, BOOL = 2, MAIN = 3, IF = 4,
        		ELSE = 5, FOR = 6, WHILE = 7, FALSE = 8,
        		BREAK = 9, RETURN = 10, TRUE = 11 ;	
        
        	//运算符
        	const int
        		NOT = 20, NE = 21, AUTOMINUS =22, MINUS = 23,
        		AUTOADD = 24, ADD = 25, OR = 26, 
        		AND = 27, MUTIPLY = 28, DIVIDE = 29, MOD = 30,
        		EQ = 31, ASSIN = 32, GE = 33, GT = 34,
        		LE = 35, LS = 36;
        
        	//分界符
        	const int 
        		COMMA = 40, SEMICOLON = 41, LLBRACKET = 42,
        		RLBRACKET = 43, LMBRACKET = 44, RMBRACKET = 45,
        		LGBRACKET = 46, RGBRACKET = 47;
        
        	//整数常数
        	const int NUM = 50;
        
        	//标识符
        	const int ID = 60;
        
        	//错误
        	const int ERROR = 404;
        
        	//空
        	const int  EMPTY = 70;
        
        }
    
    #endif
    

    2. 具体步骤

    • 一个一个字符地扫描测试代码,忽略空白字符,遇到回车时,记录行数加1
    • 要进行区分标识符(即普通变量名字)和保留字
    • 因为将标识符和常数都guiwe各自归为一类,所以要有算法能够识别出一整个常数和完整的标识符
    • 加入适当的非法词检测

    3. 设计词法分析类

      设计一个词法分析器,当然要包括如何存储一个词法单元,如何扫描(scan)测试代码等,直接上代码:

    myLexer.h

        #ifndef MYLEXER_H
        #define MYLEXER_H
        
        #include <fstream>
        #include <string>
        #include <unordered_map>
        #include "tag.h"
        
        
        /*
         * 主要是定义基本的词法单元类,
         * 声明了词法分析类
         */
        
        //存储词法单元
        class Word {
        	public:
        		Word() = default;
        		Word(std::string s, int t) : lexeme(s), tag(t) {};
        		std::string getLexeme() { return lexeme; };
        		int getTag() { return tag; }
        		void setTag(int t) { tag = t; }
        		void setLexeme(std::string s) { lexeme = s; }
        	private:
        		std::string lexeme;
        		int tag;
        };
        
        //词法分析器类
        class Lexer {
        	public:
        		Lexer();
        		void reserve(Word w);
        		bool readnext(char c, std::ifstream &in);
        		Word scan(std::ifstream &in);
        		int getLine() { return line; }
        	private:
        		char peek;
        		std::unordered_map<std::string, Word> words;
        		int line;
        };
        
        
        #endif
    

    myLexer.cpp

        #include <iostream>
        #include <cctype>
        #include <sstream>
        #include "myLexer.h"
        
        void Lexer::reserve(Word w) {
        	words.insert({w.getLexeme(), w});
        }
        
        Lexer::Lexer() {
        	//存入保留字,为了区分标识符
        	reserve( Word("int", Tag::INT) );
        	reserve( Word("bool", Tag::BOOL) );
        	reserve( Word("main", Tag::MAIN) );
        	reserve( Word("if", Tag::IF) );
        	reserve( Word("else", Tag::ELSE) );
        	reserve( Word("for", Tag::FOR) );
        	reserve( Word("while", Tag::WHILE) );
        	reserve( Word("break", Tag::BREAK) );
        	reserve( Word("return", Tag::RETURN) );
        	reserve( Word("true", Tag::TRUE) );
        	reserve( Word("false", Tag::FALSE) );
        	
        	peek = ' ';
        	line = 1;
        
        }
        
        //方便处理像>=,++等这些两个字符连在一起的运算符
        bool Lexer::readnext(char c, std::ifstream &in) {
        	in >> peek;
        	if( peek != c)
        		return false;
        	peek = ' ';
        	return true;
        }
        
        
        Word Lexer::scan(std::ifstream &in) {
        	//跳过空白符
        	while(!in.eof()) {
        		if(peek == ' ' || peek == '	') {
        			in >> peek;
        			continue;
        		}
        		else if(peek == '
    ')
        			++line;
        		else
        			break;
        		in >> peek;
        	}
        
        	//处理分界符、运算符等
        	switch(peek) {
        		case '!':
        			if(readnext('=', in))
        				return Word("!=", Tag::NE);
        			else
        				return Word("!", Tag::NOT);
        		case '-':
        			if(readnext('-', in))
        				return Word("--", Tag::AUTOMINUS);
        			else
        				return Word("-", Tag::MINUS);
        		case '+':
        			if(readnext('+', in)) 
        				return Word("++", Tag::AUTOADD);
        			else
        				return Word("+", Tag::ADD);
        		case '|':
        			if(readnext('|', in)) 
        				return Word("||", Tag::OR);
        			else
        				return Word("error", Tag::ERROR);
        		case '&':
        			if(readnext('&', in))
        				return Word("&&", Tag::AND);
        			else
        				return Word("error", Tag::ERROR);
        		case '*':
        			in >> peek;
        			return Word("*", Tag::MUTIPLY);
        		case '/':
        			in >> peek;
        			return Word("/", Tag::DIVIDE);
        		case '%':
        			in >> peek;
        			return Word("%", Tag::MOD);
        		case '=':
        			if(readnext('=', in))
        				return Word("==", Tag::EQ);
        			else
        				return Word("=", Tag::ASSIN);
        		case '>':
        			if(readnext('=', in))
        				return Word(">=", Tag::GE);
        			else
        				return Word(">", Tag::GT);
        		case '<':
        			if(readnext('=', in))
        				return Word("<=", Tag::LE);
        			else
        				return Word("<", Tag::LS);
        		case ',':
        			in >> peek;
        			return Word(",", Tag::COMMA);
        		case ';':
        			in >> peek;
        			return Word(";", Tag::SEMICOLON);
        		case '(':
        			in >> peek;
        			return Word("(", Tag::LLBRACKET);
        		case ')':
        			in >> peek;
        			return Word(")", Tag::RLBRACKET);
        		case '[':
        			in >> peek;
        			return Word("[", Tag::LMBRACKET);
        		case ']':
        			in >> peek;
        			return Word("]", Tag::RMBRACKET);
        		case '{':
        			in >> peek;
        			return Word("{", Tag::LGBRACKET);
        		case '}':
        			in >> peek;
        			return Word("}", Tag::RGBRACKET);
        	}
        	
        	//处理常数
        	if(isdigit(peek)) {
        		int v = 0;
        		do {
        			v = 10*v + peek - 48;
        			in >> peek;
        		} while(isdigit(peek));
        		if(peek != '.')
        			return Word(std::to_string(v), Tag::NUM);
        	}	
        
        
        	//处理标识符
        	if(isalpha(peek)) {
        		std::ostringstream b;		
        		do {
        			b << peek;
        			in >> peek;
        		} while(isalnum(peek) || peek == '_');
        
        		std::string tmp = b.str();
        
        		//判断是否为保留字
        		if(words.find(tmp) != words.end()) 
        			return words[tmp];
        		else
        			return Word(tmp, Tag::ID);
        	}
        	if(peek != ' ' && peek != '	' && peek != '
    ')	
        		return Word("error", Tag::ERROR);
        	return Word("empty", Tag::EMPTY);
        }
    
    

      设计完成后,自己写一个Main函数,在while循环中调用scan函数,每次打印出Word内容,就能够得到

  • 相关阅读:
    pgspider sqlite mysql docker 镜像
    pgspider docker 镜像
    pgspider基于pg 的高性能数据可视化sql 集群引擎
    diesel rust orm 框架试用
    golang 条件编译
    Performance Profiling Zeebe
    bazel 学习一 简单java 项目运行
    一个好用node http keeplive agnet
    gox 简单灵活的golang 跨平台编译工具
    mailhog 作为smtp server mock工具
  • 原文地址:https://www.cnblogs.com/vachester/p/6884345.html
Copyright © 2011-2022 走看看