zoukankan      html  css  js  c++  java
  • 简单词法分析器实现

    编写分析器有两种方法,一种是通过DFA对单词进行识别,二是通过直接编敲代码进行识别。

    本程序採用DFA对单词进行识别。
    DFA的实现方法。大概思想和书上一致,在程序中,则是用二维数组代表状态转换矩阵,用一维数组表示终态。


    一个词法编辑要实现的功能主要包含下面几点:

    可以识别标识符、keyword、数字和运算符,对凝视进行过滤。同一时候还能识别出程序错误。

    使用说明:

    本程序的输入由当前文件夹下的in.txt文件读取输入,输出为一系列二元式
    #include<stdio.h>
    #include<string.h>
    #include<stdlib.h>
    //用指针而不是二维数组表示。这样就不用指定字符串长度,仅仅是不能改动指针所指向字符串的内容
    char *key[]={"int","char","float","double","void","const","for","if","else","then","while","switch","break","main","return"};
    char buffer[20];//存储当前识别出的单词
    char *identifier[50];
    char *num[50];
    int ident_num;//标志符数
    int number;//数字数
    int judgement_num(char c) {
    	if (c >= '0' && c <= '9') return 0;
    	else if (c == '.') return 1;
    	return -1;
    }
    int judge(char c){
    	if(c=='_') return 2;
    	else if(c>='0'&&c<='9') return 1;
    	else if(c>='a'&&c<='z'||c>='A'&&c<='Z') return 0;
    	return -1;
    }
    //next_i,next_j分别代表着状态转换表move的当前状态与接收到的字符
    //width代表每一状态可能遇到的状态数,length代表终态集大小
    int DFA(int begin,int move[],int width,int final[],int length,int(*judge)(char)){
        int len=0,next_i,next_j,tmp;
        char next_char;
        memset(buffer,0,sizeof(buffer));
        next_char=getchar();
        next_i=begin;
        while((next_j=judge(next_char))!=-1){
    		tmp=next_i;
    		next_i=move[next_i*width+next_j];
    		if(next_i==-1){
                printf("move[%d][%d]has not next state
    ",tmp,next_j);
                return -1;
    		}
    		buffer[len++]=next_char;
            next_char=getchar();
    
        }
        ungetc(next_char,stdin);
        buffer[len]='';
        for(int i=0;i<length;i++){
    		if(next_i==final[i])  return 0;
        }
        return -1;
    }
    
    void is_comment(){
        char next_char;
        char tmp[5];
        memset(tmp,0,sizeof(tmp));
        tmp[0]=getchar();
        next_char=getchar();
        if(next_char=='*'){
    		while((next_char=getchar())!=EOF){
                if(next_char=='*'&&getchar()=='/') return;
    		}
    		printf("The comment is error
    ");
        }
        else if(next_char=='/'){
            while((next_char=getchar())!='
    ');
        }
        else if(next_char=='='){
            printf("(/=,_)
    ");
        }
        else{
    		printf("(/,_)
    ");
    		ungetc(next_char,stdin);
        }
    }
    void is_other(){
        char next_char,c=getchar();
        if(c=='+'||c=='-'||c=='*'||c=='%'){
            next_char=getchar();
            if(next_char=='=') printf("(%c=,_)
    ",c);
            else {
    			printf("(%c,_)
    ",c);
    			ungetc(next_char,stdin);
            }
        }
        else if(c=='>'||c=='<'){
    		next_char=getchar();
            if(next_char=='=') printf("(rlop,%c=)
    ",c);
            else {
    			printf("(rlop,%c)
    ",c);
    			ungetc(next_char,stdin);
            }
        }
        else if(c=='!'){
    		next_char=getchar();
            if(next_char=='=') printf("(!=,_)
    ");
            else {
    			printf("(not,_)
    ");
    			ungetc(next_char,stdin);
            }
        }
        else if(c=='|'){
    		next_char=getchar();
            if (next_char == '|')
    			printf("(or,||)
    ");
    		else {
    			ungetc(next_char, stdin);
    		}
        }
        else if(c=='&'){
    		next_char=getchar();
            if (next_char == '&')
    			printf("(and,&&)
    ");
    		else {
    			ungetc(next_char, stdin);
    		}
        }
        else if(c=='='||c=='('||c==')'||c=='['||c==']'||c==';'||c==','||c=='{'||c=='}'){
            printf("(%c,_)
    ",c);
        }
    }
    void is_digit(){
        int begin=0;
        int move[]={1,-1,1,2,2,-1};
    	int final[]={1,2};
    	int result=-1;
    	result=DFA(begin,move,2,final,2,judgement_num);
    	if(result==-1){
    		printf("digit DFA error
    ");
    		exit(-1);
    	}
    	else if(result==0){
            //printf("%s
    ",buffer);
            for(int i=0;i<number;i++){
    			if(strcmp(buffer,num[i])==0){
                    printf("(num,%s)
    ",buffer);
    				return;
    			}
            }
            num[number]=(char*)malloc(sizeof(buffer));
            strcpy(num[number++],buffer);
            printf("(num,%s)
    ",buffer);
            return;
    	}
    }
    void is_letter(){
    	int i;
    	int begin=0;
    	int move[] ={1,-1,1,1,1,1};
    	int final[]={1,2};
    	int result=-1;
    	result=DFA(begin,move,3,final,1,judge);
        if(result==-1){
    		printf("letter DFA error
    ");
    		exit(-1);
        }
        else if(result==0){
    		int len=sizeof(key)/sizeof(char *);
    		//如为keyword
            for(i=0;i<len;i++){
    			if(strcmp(buffer,key[i])==0){
                    printf("(key,%s)
    ",key[i]);
                    return;
    			}
            }
            //如为标志符
            for(i=0;i<ident_num;i++){
    			//在当前标志符表中已经存在
    			if(strcmp(buffer,identifier[i])==0){
    				printf("(%id,%s)
    ",identifier[i]);
    				return;
    			}
            }
            //如不存在,则写入
            identifier[ident_num]=(char*)malloc(sizeof(buffer));
            strcmp(identifier[ident_num++],buffer);
            printf("(id,%s)
    ",buffer);
            return;
        }
    }
    
    void init() {
    	ident_num=0;
        number=0;
    }
    void work() {
    	char c;
        while((c=getchar())!=EOF){
    		ungetc(c,stdin);
    		if(judge(c)==2||judge(c)==0) is_letter();
    		else if(judge(c)==1) is_digit();
    		else if(c=='/') is_comment();
    		else is_other();
        }
    }
    
    int main() {
        freopen("in.txt","r",stdin);
        init();
        work();
        return 0;
    }
    


  • 相关阅读:
    Median Value
    237. Delete Node in a Linked List
    206. Reverse Linked List
    160. Intersection of Two Linked Lists
    83. Remove Duplicates from Sorted List
    21. Merge Two Sorted Lists
    477. Total Hamming Distance
    421. Maximum XOR of Two Numbers in an Array
    397. Integer Replacement
    318. Maximum Product of Word Lengths
  • 原文地址:https://www.cnblogs.com/wzzkaifa/p/7039331.html
Copyright © 2011-2022 走看看