包 lexer 是词法分析器的代码的扩展。类 Tag 定义了各个词法单元对应的常量。
1: package lexer;2: public class Tag {
3: public final static int
4: AND = 256 , BASIC = 257 , BREAK = 258 , DO = 259 , ELSE = 260 , 5: EQ = 261 , FALSE = 262 , GE = 263 , ID = 264 , IF = 265 , 6: INDEX = 266 , LE = 267 , MINUS = 268 , NE = 269 , NUM = 270 , 7: OR = 271 , REAL = 272 , TEMP = 273 , TRUE = 274 , WHILE = 275 ; 8: }其中的三个常量 INDEX、MINUS、和TEMP不是词法单元他们将在抽象语法树中使用。
类Token和Num增加了方法toString;
1: package lexer;2: public class Token {
3: public final static int tag;
4: 5: public Token(int t){
6: tag=t; 7: } 8: 9: public String toString(){
10: return ""+(char)tag;
11: } 12: } 13: 14: package lexer; //文件 Num.java15: public class Num extends Token{
16: public final value;
17: 18: public Num(int v){
19: super(Tag.NUM);20: value=v;
21: } 22: 23: public String toString(){
24: return "" + value;
25: } 26: }类Word用于管理保留字、标识符和像 && 这样的复合词法单元的词素。它也可以用来管理在中间代码中运算符的书写符号形式;比如单目减号。例如,原文件中的 –2 的中间形式是minus 2.
1: package lexer;2: public class Word extends Token{
3: public String lexeme = "";
4: 5: public Word(String s ,int tag){
6: super(tag); 7: lexeme = s; 8: } 9: 10: public String toString(){
11: return lexeme;
12: } 13: 14: public static final Word
15: and = new Word("&&",Tag.AND), or = new Word("||",Tag.OR),
16: eq = new Word("==",Tag.EQ), ne = new Word("!=",Tag.NE),
17: le = new Word("<=",Tag.LE), ge = new Word(">=",Tag.GE),
18: minus = new Word("minus",Tag.MINUS),
19: True = new Word("true",Tag.TRUE),
20: False = new Word("false",Tag.FAlSE),
21: temp = new Word("t",Tag.TEMP);
22: }类Real用于处理浮点数:
1: package lexer;2: public class Real extends Token{
3: public final float value;
4: 5: public Real(float v){
6: super(Tag.REAL);7: value = v;
8: } 9: 10: public String toString(){
11: return "" + value;
12: } 13: }类Lexer的主方法,即函数scan,识别数字、标识符和保留字。
类Lexer中的第9~13行保留了关键字。第14~16行保留了在其他地方定义的对象的词素。对象Word.True和Word.False在类Word中定义。对应于基本类型 int 、char、bool 和 float 的对象在类 Type 中定义。类Type 是Word 的一个子类。类 Type 来自包 symbols .
1: package lexer; //文件Lexer.java
2: import java.io.*; 3: import java.util.*; 4: import symbols.*;5: public class Lexer{
6: public static int line = 1;
7: char peek = ' ';
8: Hashtable words = new Hashtalbe();
9: 10: void reverse(Word w){
11: words.put(w.lexeme,w); 12: } 13: 14: public Lexer(){
15: reverse(new Word("if",Tag.IF));
16: reverse(new Word("else",Tag.ELSE));
17: reverse(new Word("while",Tag.WHILE));
18: reverse(new Word("do",Tag.DO));
19: reverse(new Word("break",Tag.BREAK));
20: reverse(Word.True); 21: reverse(Word.False); 22: reverse(Word.Int); 23: reverse(Word.Char); 24: reverse(Word.Bool); 25: reverse(Word.Float); 26: }函数readch()(第18行)用于把下一个输入字符读到变量peek中。名字readch被复用或重载,(第19~24行),以便帮助识别复合的词法单元。比如,一看到输入字符<,调用readch(“=”)就会把下一个字符读入peek,并检查它是否为=。
1: void readch() throws IOException{
2: peek=(char) System.in.read();
3: } 4: 5: boolean readch(char c)throws IOException{
6: readch();7: if(peek != c)
8: return false;
9: peek = ' ';
10: return true;
11: }函数scan一开始首先略过所有的空白字符(第26~30行)。它首先试图识别像<=这样的复合词法单元(第31~34行)和项365这样的数字(第45~58行)。如果不成功,他就试图读入一个字符串(第59~70行)。
1: public Token scan() throws IOException{
2: for( ; ; readch()){
3: if(peek == ' '|| peek == ' ')
4: continue;
5: else if(peek == ' ')
6: line=line+1;7: else
8: break;
9: 10: switch(peek){
11: case '&':
12: if(readch('&'))
13: return Word.and;
14: else
15: return new Token('&');
16: case '|':
17: if(readch('|'))
18: return Word.or;
19: else
20: return new Token('|');
21: case '=':
22: if(readch('='))
23: return Word.eq;
24: else
25: return new Token('=');
26: case '!':
27: if(readch('!'))
28: return Word.ne;
29: else
30: return new Token('!');
31: case '<':
32: if(readch('<'))
33: return Word.le;
34: else
35: return new Token('<');
36: case '>':
37: if(readch('>'))
38: return Word.ge;
39: else
40: return new Token('>');
41: } 42: 43: if(Character.isDigit(peek)){
44: int v=0;
45: do{
46: v=10*v=Character.digit(peek,10); 47: readch();48: }while(Character.isDigit(peek));
49: 50: if(peek!='.')
51: return new Num(v);
52: float x= v;
53: float d=10;
54: for( ; ; ){
55: readch();56: if(! Character.isDigit(peek))
57: break;
58: x=x+Character.digit(peek,10)/d; 59: d=d*10; 60: }61: return new Real(x);
62: } 63: 64: if(Character.isLetter(peek)){
65: StringBuffer b=new StringBuffer();
66: do{
67: b.append(peek); 68: readch();69: }while(Character.isLetterOrDigit(peek));
70: String s=b.toString(); 71: Word w=(Word)words.get(s);72: if(w!=null)
73: return w;
74: w = new Word(s,Tag.ID);
75: words.put(s,w);76: return w;
77: }78: Token tok=new Token(peek);
79: peek=' ';
80: return tok;
81: } 82: }