zoukankan      html  css  js  c++  java
  • A.3 词法分析器

    包 lexer 是词法分析器的代码的扩展。类 Tag 定义了各个词法单元对应的常量。

       1:  package lexer;
       2:  public class Tag {
       3:      public final static int 
       4:          AND = 256 , BASIC = 257 , BREAK = 258 , DO = 259 , ELSE = 260 ,
       5:          EQ = 261 , FALSE = 262 , GE = 263 , ID = 264 , IF = 265 ,
       6:          INDEX = 266 , LE = 267 , MINUS = 268 , NE = 269 , NUM = 270 ,
       7:          OR = 271 , REAL = 272 , TEMP = 273 , TRUE = 274 , WHILE = 275 ;
       8:  }

    其中的三个常量 INDEX、MINUS、和TEMP不是词法单元他们将在抽象语法树中使用。

    类Token和Num增加了方法toString;
     
       1:  package lexer;
       2:  public class Token {
       3:      public final static int tag;
       4:   
       5:      public Token(int t){
       6:          tag=t;
       7:      }
       8:      
       9:      public String toString(){
      10:          return ""+(char)tag;
      11:      }
      12:  }
      13:   
      14:  package lexer;                             //文件 Num.java
      15:  public class Num extends Token{
      16:      public final value;
      17:   
      18:      public Num(int v){
      19:          super(Tag.NUM);
      20:          value=v;
      21:      }
      22:   
      23:      public String toString(){
      24:          return "" + value;
      25:      }
      26:  }

    类Word用于管理保留字、标识符和像 && 这样的复合词法单元的词素。它也可以用来管理在中间代码中运算符的书写符号形式;比如单目减号。例如,原文件中的 –2 的中间形式是minus 2.

       1:  package lexer;
       2:  public class Word extends Token{
       3:      public String lexeme = "";
       4:   
       5:      public Word(String s ,int tag){
       6:          super(tag);
       7:          lexeme = s;
       8:      }
       9:   
      10:      public String toString(){
      11:          return lexeme;
      12:      }
      13:   
      14:      public static final Word
      15:          and = new Word("&&",Tag.AND), or = new Word("||",Tag.OR),
      16:          eq = new Word("==",Tag.EQ), ne = new Word("!=",Tag.NE),
      17:          le = new Word("<=",Tag.LE), ge = new Word(">=",Tag.GE),
      18:          minus = new Word("minus",Tag.MINUS),
      19:          True = new Word("true",Tag.TRUE),
      20:          False = new Word("false",Tag.FAlSE),
      21:          temp = new Word("t",Tag.TEMP);
      22:  }

    类Real用于处理浮点数:

       1:  package lexer;
       2:  public class Real extends Token{
       3:      public final float value;
       4:   
       5:      public Real(float v){
       6:          super(Tag.REAL);
       7:          value = v;
       8:      }
       9:   
      10:      public String toString(){
      11:          return "" + value;
      12:      }
      13:  }

    类Lexer的主方法,即函数scan,识别数字、标识符和保留字。

    类Lexer中的第9~13行保留了关键字。第14~16行保留了在其他地方定义的对象的词素。对象Word.True和Word.False在类Word中定义。对应于基本类型 int 、char、bool 和 float 的对象在类 Type 中定义。类Type 是Word 的一个子类。类 Type 来自包 symbols .

       1:  package lexer;                     //文件Lexer.java
       2:  import java.io.*;
       3:  import java.util.*;
       4:  import symbols.*;
       5:  public class Lexer{
       6:      public static int line = 1;
       7:      char peek = ' ';
       8:      Hashtable words = new Hashtalbe();
       9:   
      10:      void reverse(Word w){
      11:          words.put(w.lexeme,w);
      12:      }
      13:   
      14:      public Lexer(){
      15:          reverse(new Word("if",Tag.IF));
      16:          reverse(new Word("else",Tag.ELSE));
      17:          reverse(new Word("while",Tag.WHILE));
      18:          reverse(new Word("do",Tag.DO));
      19:          reverse(new Word("break",Tag.BREAK));
      20:          reverse(Word.True);
      21:          reverse(Word.False);
      22:          reverse(Word.Int);
      23:          reverse(Word.Char);
      24:          reverse(Word.Bool);
      25:          reverse(Word.Float);
      26:  }

    函数readch()(第18行)用于把下一个输入字符读到变量peek中。名字readch被复用或重载,(第19~24行),以便帮助识别复合的词法单元。比如,一看到输入字符<,调用readch(“=”)就会把下一个字符读入peek,并检查它是否为=。

       1:  void readch() throws IOException{
       2:      peek=(char) System.in.read();
       3:  }
       4:   
       5:  boolean readch(char c)throws IOException{
       6:      readch();
       7:      if(peek != c)
       8:          return false;
       9:      peek = ' ';
      10:      return true;
      11:  }

    函数scan一开始首先略过所有的空白字符(第26~30行)。它首先试图识别像<=这样的复合词法单元(第31~34行)和项365这样的数字(第45~58行)。如果不成功,他就试图读入一个字符串(第59~70行)。

       1:  public Token scan() throws IOException{
       2:      for( ; ; readch()){
       3:          if(peek == ' '|| peek == '	')
       4:              continue;
       5:          else if(peek == '
    ')
       6:              line=line+1;
       7:          else
       8:              break;
       9:   
      10:          switch(peek){
      11:              case '&':
      12:                  if(readch('&'))
      13:                      return Word.and;
      14:                  else
      15:                      return new Token('&');
      16:              case '|':
      17:                  if(readch('|'))
      18:                      return Word.or;
      19:                  else
      20:                      return new Token('|');
      21:              case '=':
      22:                  if(readch('='))
      23:                      return Word.eq;
      24:                  else
      25:                      return new Token('=');
      26:              case '!':
      27:                  if(readch('!'))
      28:                      return Word.ne;
      29:                  else
      30:                      return new Token('!');
      31:              case '<':
      32:                  if(readch('<'))
      33:                      return Word.le;
      34:                  else
      35:                      return new Token('<');
      36:              case '>':
      37:                  if(readch('>'))
      38:                      return Word.ge;
      39:                  else
      40:                      return new Token('>');        
      41:          }
      42:   
      43:          if(Character.isDigit(peek)){
      44:              int v=0;
      45:              do{
      46:                  v=10*v=Character.digit(peek,10);
      47:                  readch();
      48:              }while(Character.isDigit(peek));
      49:   
      50:              if(peek!='.')
      51:                  return new Num(v);
      52:              float x= v;
      53:              float d=10;
      54:              for( ; ; ){
      55:                  readch();
      56:                  if(! Character.isDigit(peek))
      57:                      break; 
      58:                  x=x+Character.digit(peek,10)/d;
      59:                  d=d*10;
      60:              }
      61:              return new Real(x);
      62:          }
      63:   
      64:          if(Character.isLetter(peek)){
      65:              StringBuffer b=new StringBuffer();
      66:              do{
      67:                  b.append(peek);
      68:                  readch();
      69:              }while(Character.isLetterOrDigit(peek));
      70:              String s=b.toString();
      71:              Word w=(Word)words.get(s);
      72:              if(w!=null)
      73:                  return w;
      74:              w = new Word(s,Tag.ID);
      75:              words.put(s,w);
      76:              return w;
      77:          }
      78:          Token tok=new Token(peek);
      79:          peek=' ';
      80:          return tok;
      81:      }
      82:  }
    最后,peek中的任意字符都被作为词法单元返回。
  • 相关阅读:
    命令行获取当前日期及时间
    Nginx配置性能优化
    一些查看网络连接的命令
    Python 3.5源码编译安装
    Node.js 安装配置
    NFS服务器配置文档
    Linux服务器SSH免密互访
    LVM逻辑卷管理命令
    Zabbix客户端安装
    CentOS 7网卡网桥、绑定设置
  • 原文地址:https://www.cnblogs.com/ZJUT-jiangnan/p/3524637.html
Copyright © 2011-2022 走看看