zoukankan      html  css  js  c++  java
  • A.3 词法分析器

    包 lexer 是词法分析器的代码的扩展。类 Tag 定义了各个词法单元对应的常量。

       1:  package lexer;
       2:  public class Tag {
       3:      public final static int 
       4:          AND = 256 , BASIC = 257 , BREAK = 258 , DO = 259 , ELSE = 260 ,
       5:          EQ = 261 , FALSE = 262 , GE = 263 , ID = 264 , IF = 265 ,
       6:          INDEX = 266 , LE = 267 , MINUS = 268 , NE = 269 , NUM = 270 ,
       7:          OR = 271 , REAL = 272 , TEMP = 273 , TRUE = 274 , WHILE = 275 ;
       8:  }

    其中的三个常量 INDEX、MINUS、和TEMP不是词法单元他们将在抽象语法树中使用。

    类Token和Num增加了方法toString;
     
       1:  package lexer;
       2:  public class Token {
       3:      public final static int tag;
       4:   
       5:      public Token(int t){
       6:          tag=t;
       7:      }
       8:      
       9:      public String toString(){
      10:          return ""+(char)tag;
      11:      }
      12:  }
      13:   
      14:  package lexer;                             //文件 Num.java
      15:  public class Num extends Token{
      16:      public final value;
      17:   
      18:      public Num(int v){
      19:          super(Tag.NUM);
      20:          value=v;
      21:      }
      22:   
      23:      public String toString(){
      24:          return "" + value;
      25:      }
      26:  }

    类Word用于管理保留字、标识符和像 && 这样的复合词法单元的词素。它也可以用来管理在中间代码中运算符的书写符号形式;比如单目减号。例如,原文件中的 –2 的中间形式是minus 2.

       1:  package lexer;
       2:  public class Word extends Token{
       3:      public String lexeme = "";
       4:   
       5:      public Word(String s ,int tag){
       6:          super(tag);
       7:          lexeme = s;
       8:      }
       9:   
      10:      public String toString(){
      11:          return lexeme;
      12:      }
      13:   
      14:      public static final Word
      15:          and = new Word("&&",Tag.AND), or = new Word("||",Tag.OR),
      16:          eq = new Word("==",Tag.EQ), ne = new Word("!=",Tag.NE),
      17:          le = new Word("<=",Tag.LE), ge = new Word(">=",Tag.GE),
      18:          minus = new Word("minus",Tag.MINUS),
      19:          True = new Word("true",Tag.TRUE),
      20:          False = new Word("false",Tag.FAlSE),
      21:          temp = new Word("t",Tag.TEMP);
      22:  }

    类Real用于处理浮点数:

       1:  package lexer;
       2:  public class Real extends Token{
       3:      public final float value;
       4:   
       5:      public Real(float v){
       6:          super(Tag.REAL);
       7:          value = v;
       8:      }
       9:   
      10:      public String toString(){
      11:          return "" + value;
      12:      }
      13:  }

    类Lexer的主方法,即函数scan,识别数字、标识符和保留字。

    类Lexer中的第9~13行保留了关键字。第14~16行保留了在其他地方定义的对象的词素。对象Word.True和Word.False在类Word中定义。对应于基本类型 int 、char、bool 和 float 的对象在类 Type 中定义。类Type 是Word 的一个子类。类 Type 来自包 symbols .

       1:  package lexer;                     //文件Lexer.java
       2:  import java.io.*;
       3:  import java.util.*;
       4:  import symbols.*;
       5:  public class Lexer{
       6:      public static int line = 1;
       7:      char peek = ' ';
       8:      Hashtable words = new Hashtalbe();
       9:   
      10:      void reverse(Word w){
      11:          words.put(w.lexeme,w);
      12:      }
      13:   
      14:      public Lexer(){
      15:          reverse(new Word("if",Tag.IF));
      16:          reverse(new Word("else",Tag.ELSE));
      17:          reverse(new Word("while",Tag.WHILE));
      18:          reverse(new Word("do",Tag.DO));
      19:          reverse(new Word("break",Tag.BREAK));
      20:          reverse(Word.True);
      21:          reverse(Word.False);
      22:          reverse(Word.Int);
      23:          reverse(Word.Char);
      24:          reverse(Word.Bool);
      25:          reverse(Word.Float);
      26:  }

    函数readch()(第18行)用于把下一个输入字符读到变量peek中。名字readch被复用或重载,(第19~24行),以便帮助识别复合的词法单元。比如,一看到输入字符<,调用readch(“=”)就会把下一个字符读入peek,并检查它是否为=。

       1:  void readch() throws IOException{
       2:      peek=(char) System.in.read();
       3:  }
       4:   
       5:  boolean readch(char c)throws IOException{
       6:      readch();
       7:      if(peek != c)
       8:          return false;
       9:      peek = ' ';
      10:      return true;
      11:  }

    函数scan一开始首先略过所有的空白字符(第26~30行)。它首先试图识别像<=这样的复合词法单元(第31~34行)和项365这样的数字(第45~58行)。如果不成功,他就试图读入一个字符串(第59~70行)。

       1:  public Token scan() throws IOException{
       2:      for( ; ; readch()){
       3:          if(peek == ' '|| peek == '	')
       4:              continue;
       5:          else if(peek == '
    ')
       6:              line=line+1;
       7:          else
       8:              break;
       9:   
      10:          switch(peek){
      11:              case '&':
      12:                  if(readch('&'))
      13:                      return Word.and;
      14:                  else
      15:                      return new Token('&');
      16:              case '|':
      17:                  if(readch('|'))
      18:                      return Word.or;
      19:                  else
      20:                      return new Token('|');
      21:              case '=':
      22:                  if(readch('='))
      23:                      return Word.eq;
      24:                  else
      25:                      return new Token('=');
      26:              case '!':
      27:                  if(readch('!'))
      28:                      return Word.ne;
      29:                  else
      30:                      return new Token('!');
      31:              case '<':
      32:                  if(readch('<'))
      33:                      return Word.le;
      34:                  else
      35:                      return new Token('<');
      36:              case '>':
      37:                  if(readch('>'))
      38:                      return Word.ge;
      39:                  else
      40:                      return new Token('>');        
      41:          }
      42:   
      43:          if(Character.isDigit(peek)){
      44:              int v=0;
      45:              do{
      46:                  v=10*v=Character.digit(peek,10);
      47:                  readch();
      48:              }while(Character.isDigit(peek));
      49:   
      50:              if(peek!='.')
      51:                  return new Num(v);
      52:              float x= v;
      53:              float d=10;
      54:              for( ; ; ){
      55:                  readch();
      56:                  if(! Character.isDigit(peek))
      57:                      break; 
      58:                  x=x+Character.digit(peek,10)/d;
      59:                  d=d*10;
      60:              }
      61:              return new Real(x);
      62:          }
      63:   
      64:          if(Character.isLetter(peek)){
      65:              StringBuffer b=new StringBuffer();
      66:              do{
      67:                  b.append(peek);
      68:                  readch();
      69:              }while(Character.isLetterOrDigit(peek));
      70:              String s=b.toString();
      71:              Word w=(Word)words.get(s);
      72:              if(w!=null)
      73:                  return w;
      74:              w = new Word(s,Tag.ID);
      75:              words.put(s,w);
      76:              return w;
      77:          }
      78:          Token tok=new Token(peek);
      79:          peek=' ';
      80:          return tok;
      81:      }
      82:  }
    最后,peek中的任意字符都被作为词法单元返回。
  • 相关阅读:
    494 Target Sum 目标和
    493 Reverse Pairs 翻转对
    492 Construct the Rectangle 构建矩形
    491 Increasing Subsequences 递增子序列
    488 Zuma Game 祖玛游戏
    486 Predict the Winner 预测赢家
    485 Max Consecutive Ones 最大连续1的个数
    483 Smallest Good Base
    Django Form组件
    Django Auth组件
  • 原文地址:https://www.cnblogs.com/ZJUT-jiangnan/p/3524637.html
Copyright © 2011-2022 走看看