zoukankan      html  css  js  c++  java
  • 简单的C语言编译器--词法分析器

    1. 定义词法单元Tag

      首先要将可能出现的词进行分类,可以有不同的分类方式。如多符一类:将所有逗号、分号、括号等都归为一类,或者一符一类,将一个符号归为一类。我这里采用的是一符一类的方式。C代码如下:

        #ifndef TAG_H
        #define TAG_H
        
        namespace Tag {
        	//保留字
        	const int
        		INT = 1, BOOL = 2, MAIN = 3, IF = 4,
        		ELSE = 5, FOR = 6, WHILE = 7, FALSE = 8,
        		BREAK = 9, RETURN = 10, TRUE = 11 ;	
        
        	//运算符
        	const int
        		NOT = 20, NE = 21, AUTOMINUS =22, MINUS = 23,
        		AUTOADD = 24, ADD = 25, OR = 26, 
        		AND = 27, MUTIPLY = 28, DIVIDE = 29, MOD = 30,
        		EQ = 31, ASSIN = 32, GE = 33, GT = 34,
        		LE = 35, LS = 36;
        
        	//分界符
        	const int 
        		COMMA = 40, SEMICOLON = 41, LLBRACKET = 42,
        		RLBRACKET = 43, LMBRACKET = 44, RMBRACKET = 45,
        		LGBRACKET = 46, RGBRACKET = 47;
        
        	//整数常数
        	const int NUM = 50;
        
        	//标识符
        	const int ID = 60;
        
        	//错误
        	const int ERROR = 404;
        
        	//空
        	const int  EMPTY = 70;
        
        }
    
    #endif
    

    2. 具体步骤

    • 一个一个字符地扫描测试代码,忽略空白字符,遇到回车时,记录行数加1
    • 要进行区分标识符(即普通变量名字)和保留字
    • 因为将标识符和常数都guiwe各自归为一类,所以要有算法能够识别出一整个常数和完整的标识符
    • 加入适当的非法词检测

    3. 设计词法分析类

      设计一个词法分析器,当然要包括如何存储一个词法单元,如何扫描(scan)测试代码等,直接上代码:

    myLexer.h

        #ifndef MYLEXER_H
        #define MYLEXER_H
        
        #include <fstream>
        #include <string>
        #include <unordered_map>
        #include "tag.h"
        
        
        /*
         * 主要是定义基本的词法单元类,
         * 声明了词法分析类
         */
        
        //存储词法单元
        class Word {
        	public:
        		Word() = default;
        		Word(std::string s, int t) : lexeme(s), tag(t) {};
        		std::string getLexeme() { return lexeme; };
        		int getTag() { return tag; }
        		void setTag(int t) { tag = t; }
        		void setLexeme(std::string s) { lexeme = s; }
        	private:
        		std::string lexeme;
        		int tag;
        };
        
        //词法分析器类
        class Lexer {
        	public:
        		Lexer();
        		void reserve(Word w);
        		bool readnext(char c, std::ifstream &in);
        		Word scan(std::ifstream &in);
        		int getLine() { return line; }
        	private:
        		char peek;
        		std::unordered_map<std::string, Word> words;
        		int line;
        };
        
        
        #endif
    

    myLexer.cpp

        #include <iostream>
        #include <cctype>
        #include <sstream>
        #include "myLexer.h"
        
        void Lexer::reserve(Word w) {
        	words.insert({w.getLexeme(), w});
        }
        
        Lexer::Lexer() {
        	//存入保留字,为了区分标识符
        	reserve( Word("int", Tag::INT) );
        	reserve( Word("bool", Tag::BOOL) );
        	reserve( Word("main", Tag::MAIN) );
        	reserve( Word("if", Tag::IF) );
        	reserve( Word("else", Tag::ELSE) );
        	reserve( Word("for", Tag::FOR) );
        	reserve( Word("while", Tag::WHILE) );
        	reserve( Word("break", Tag::BREAK) );
        	reserve( Word("return", Tag::RETURN) );
        	reserve( Word("true", Tag::TRUE) );
        	reserve( Word("false", Tag::FALSE) );
        	
        	peek = ' ';
        	line = 1;
        
        }
        
        //方便处理像>=,++等这些两个字符连在一起的运算符
        bool Lexer::readnext(char c, std::ifstream &in) {
        	in >> peek;
        	if( peek != c)
        		return false;
        	peek = ' ';
        	return true;
        }
        
        
        Word Lexer::scan(std::ifstream &in) {
        	//跳过空白符
        	while(!in.eof()) {
        		if(peek == ' ' || peek == '	') {
        			in >> peek;
        			continue;
        		}
        		else if(peek == '
    ')
        			++line;
        		else
        			break;
        		in >> peek;
        	}
        
        	//处理分界符、运算符等
        	switch(peek) {
        		case '!':
        			if(readnext('=', in))
        				return Word("!=", Tag::NE);
        			else
        				return Word("!", Tag::NOT);
        		case '-':
        			if(readnext('-', in))
        				return Word("--", Tag::AUTOMINUS);
        			else
        				return Word("-", Tag::MINUS);
        		case '+':
        			if(readnext('+', in)) 
        				return Word("++", Tag::AUTOADD);
        			else
        				return Word("+", Tag::ADD);
        		case '|':
        			if(readnext('|', in)) 
        				return Word("||", Tag::OR);
        			else
        				return Word("error", Tag::ERROR);
        		case '&':
        			if(readnext('&', in))
        				return Word("&&", Tag::AND);
        			else
        				return Word("error", Tag::ERROR);
        		case '*':
        			in >> peek;
        			return Word("*", Tag::MUTIPLY);
        		case '/':
        			in >> peek;
        			return Word("/", Tag::DIVIDE);
        		case '%':
        			in >> peek;
        			return Word("%", Tag::MOD);
        		case '=':
        			if(readnext('=', in))
        				return Word("==", Tag::EQ);
        			else
        				return Word("=", Tag::ASSIN);
        		case '>':
        			if(readnext('=', in))
        				return Word(">=", Tag::GE);
        			else
        				return Word(">", Tag::GT);
        		case '<':
        			if(readnext('=', in))
        				return Word("<=", Tag::LE);
        			else
        				return Word("<", Tag::LS);
        		case ',':
        			in >> peek;
        			return Word(",", Tag::COMMA);
        		case ';':
        			in >> peek;
        			return Word(";", Tag::SEMICOLON);
        		case '(':
        			in >> peek;
        			return Word("(", Tag::LLBRACKET);
        		case ')':
        			in >> peek;
        			return Word(")", Tag::RLBRACKET);
        		case '[':
        			in >> peek;
        			return Word("[", Tag::LMBRACKET);
        		case ']':
        			in >> peek;
        			return Word("]", Tag::RMBRACKET);
        		case '{':
        			in >> peek;
        			return Word("{", Tag::LGBRACKET);
        		case '}':
        			in >> peek;
        			return Word("}", Tag::RGBRACKET);
        	}
        	
        	//处理常数
        	if(isdigit(peek)) {
        		int v = 0;
        		do {
        			v = 10*v + peek - 48;
        			in >> peek;
        		} while(isdigit(peek));
        		if(peek != '.')
        			return Word(std::to_string(v), Tag::NUM);
        	}	
        
        
        	//处理标识符
        	if(isalpha(peek)) {
        		std::ostringstream b;		
        		do {
        			b << peek;
        			in >> peek;
        		} while(isalnum(peek) || peek == '_');
        
        		std::string tmp = b.str();
        
        		//判断是否为保留字
        		if(words.find(tmp) != words.end()) 
        			return words[tmp];
        		else
        			return Word(tmp, Tag::ID);
        	}
        	if(peek != ' ' && peek != '	' && peek != '
    ')	
        		return Word("error", Tag::ERROR);
        	return Word("empty", Tag::EMPTY);
        }
    
    

      设计完成后,自己写一个Main函数,在while循环中调用scan函数,每次打印出Word内容,就能够得到

  • 相关阅读:
    立即执行函数
    函数 闭包
    函数 预编译
    函数
    函数
    变量作用域
    保留字
    JavaScript 中的 算术运算
    图片上传效果
    HTML标签,元素类型 概览
  • 原文地址:https://www.cnblogs.com/vachester/p/6884345.html
Copyright © 2011-2022 走看看