参考:http://www.cnblogs.com/yanlingyin/archive/2012/04/17/2451717.html
实现了一个简单的java词法分析器
功能:词法分析下面一段java小程序
1 int sum = 0; for(int i = 1; i <= 100; i = i + 1) sum += i;#
1.程序片段中使用到的关键字、运算符和界符:
关键词:
Int for
运算符:
+ =
界符:
( ) <= #
2.单词和单词种别码设计
单词符号 |
种别码 |
Int |
1 |
For |
2 |
Letter(letter|digit)* |
3 |
Digit|digit* |
4 |
* |
5 |
/ |
6 |
+ |
7 |
- |
8 |
> |
9 |
>= |
10 |
< |
11 |
<= |
12 |
= |
13 |
; |
14 |
( |
15 |
) |
16 |
# |
17 |
词法分析器源代码
1 package com.gxf.lexical; 2 3 import java.util.Scanner; 4 5 public class Lexical { 6 String input = ""; //源程序 7 String keyWords[] = {"int", "for"}; //关键词 8 int point = 0; //全局指针指向源程序 9 int syn = 0; //单词种别码 10 int sum = 0; //数字的总和 11 StringBuffer token = new StringBuffer(""); //存放单词 12 13 public static void main(String[] args) { 14 Scanner scanner = new Scanner(System.in); 15 Lexical lexical = new Lexical(); 16 17 lexical.input = scanner.nextLine(); 18 // System.out.println(lexical.input);//输入源程序 19 scanner.close(); 20 21 do{ 22 lexical.scanner(); 23 switch(lexical.syn){ 24 case 4: 25 System.out.println("(" + lexical.syn + "," + lexical.sum + ")"); 26 break; 27 default: 28 System.out.println("(" + lexical.syn + "," + lexical.token + ")"); 29 break; 30 } 31 }while(lexical.syn != 17); 32 } 33 34 /** 35 * 词法分析器 36 */ 37 public void scanner(){ 38 //将单词置为空 39 token = new StringBuffer(); 40 while(' ' == input.charAt(point)) 41 point++; //去掉空格 42 if((input.charAt(point) >= 'a' && input.charAt(point) <= 'z') || 43 (input.charAt(point) >= 'A' && input.charAt(point) <= 'Z')){//关键词或者标识符 44 syn = 3;//种别码为3 45 while((input.charAt(point) >= 'a' && input.charAt(point) <= 'z') || 46 (input.charAt(point) >= 'A' && input.charAt(point) <= 'Z') || 47 (input.charAt(point) >= '0' && input.charAt(point) <= '9')){ 48 token.append(input.charAt(point)); 49 point++; 50 } 51 // point--;//后退一个位置 52 for(int i = 0; i < keyWords.length; i++){ 53 if(keyWords[i].equals(String.valueOf(token))){ 54 syn = i + 1;//修改种别码 55 break; 56 } 57 } 58 }//if 59 else if(input.charAt(point) >= '0' && input.charAt(point) <= '9'){//如果是数字 60 syn = 4; 61 sum = 0; 62 while(input.charAt(point) >= '0' && input.charAt(point) <= '9'){ 63 sum = sum * 10 + (input.charAt(point) - '0'); 64 point++; 65 } 66 //point--;//后退一个字符 67 }//else if 68 else{//其他字符 69 switch(input.charAt(point)){ 70 case '>'://大于符号 71 token = new StringBuffer(">");//重置token 72 point++; 73 if(input.charAt(point) == '='){ 74 token.append("="); 75 syn = 10; 76 }else{ 77 syn = 9; 78 point--; 79 } 80 point++; 81 break; 82 case '<': 83 token = new StringBuffer("<"); 84 point++; 85 if(input.charAt(point) == '='){ 86 token.append("="); 87 syn = 12; 88 }else{ 89 syn = 11; 90 point--; 91 } 92 point++; 93 break; 94 case '*': 95 token = new StringBuffer("*"); 96 syn = 5; 97 point++; 98 break; 99 case '/': 100 token = new StringBuffer("/"); 101 syn = 6; 102 point++; 103 break; 104 case '+': 105 token = new StringBuffer("+"); 106 syn = 7; 107 point++; 108 break; 109 case '-': 110 token = new StringBuffer("-"); 111 syn = 8; 112 point++; 113 break; 114 case ';': 115 token = new StringBuffer(";"); 116 syn = 14; 117 point++; 118 break; 119 case '(': 120 token = new StringBuffer("("); 121 syn = 15; 122 point++; 123 break; 124 case ')': 125 token = new StringBuffer(")"); 126 syn = 16; 127 point++; 128 break; 129 case '#': 130 token = new StringBuffer("#"); 131 syn = 17; 132 point++; 133 break; 134 case '=': 135 token = new StringBuffer("="); 136 syn = 13; 137 point++; 138 break; 139 } 140 141 } 142 } 143 }
注意程序片段要以#号结束
其实,上面参考的博客写得还不错可以看看