zoukankan      html  css  js  c++  java
  • 使用flex和bison实现的sql引擎解析

    因为老师要求,近期在做oceanbase存储过程的实现,在oceanbase 0.4曾经是不支持存储过程的。实现的主要步骤主要包含

    1、语法解析

    2、词法解析

    3、详细运行语法树的步骤

    如今先来说说语法解析吧,在这一块主要是使用的flex( 词法分析器生成工具) 和bison(语法分析器生成器) 这两个是对用户输入的存储过程语句进行解析的

    来详细说说该怎么实现对sql语句的分析吧

    1、首先建立一个lex的文件

    %option noyywrap nodefault yylineno case-insensitive
    
    %{
    
    #include "prosql.tab.hpp"
    #include <stdarg.h>
    #include <string.h>
    #include <stdlib.h>
    #include <stdio.h>
    #include <malloc.h>
    //YYSTYPE yylval;
    int oldstate;
    extern "C" int yylex();
    //extern "C" int yyparse();
    extern "C" void yyerror(const char *s, ...);
    extern char globalInputText[10000];
    extern int readInputForLexer( char *buffer, int *numBytesRead, int maxBytesToRead );
    #undef YY_INPUT
    #define YY_INPUT(b,r,s) readInputForLexer(b,&r,s)
    %}
    
    %x COMMENT
    
    %%
    
    CREATE		{ return CREATE; }
    PROCEDURE	{ return PROCEDURE; }
    SQL		{ return SQL; }
    
    DECLARE		{ return DECLARE; }
    SET		{ return SET; }
    BEGIN		{ return BEGINT; }
    END		{ return END; }
    
    INT		{ return INT; }
    VARCHAR		{ return VARCHAR; }
    DATE		{ return DATE; }
    TIME		{ return TIME; }
    DOUBLE		{ return DOUBLE; }
    
    IF		{ return IF; }
    THEN		{ return THEN; }
    ELSE		{ return ELSE; }
    ENDIF		{ return ENDIF; }
    FOR		{ return FOR; }
    WHEN		{ return WHEN; }
    WHILE		{ return WHILE; }
    
    
    [0-9]+	{ yylval.strval = strdup(yytext);/*printf("number=%s
    ",yylval.strval);*/ return INTNUM; }/*number*/
    
    [0-9]+"."[0-9]* |
    "."[0-9]+	|
    [0-9]+E[-+]?[0-9]+	|
    [0-9]+"."[0-9]*E[-+]?[0-9]+ |
    "."[0-9]*E[-+]?[0-9]+	{ yylval.strval = strdup(yytext);/*printf("float=%s
    ",yylval.strval);*/ return APPROXNUM; }/*double*/
    
    TRUE	{ yylval.strval = "1";/*printf("bool=%s
    ",yylval.strval);*/ return BOOL; }/*bool*/
    
    FALSE	{ yylval.strval = "0";/*printf("bool=%s
    ",yylval.strval);*/ return BOOL; }/*bool*/
    
    '(\.|''|[^'
    ])*'	|
    "(\.|""|[^"
    ])*"  {
    				char *temp = strdup(yytext); 
    				yylval.strval = strdup(yytext);
    
    				//GetCorrectString(yylval.strval, temp);
    				
    				/*printf("string=%s
    ",yylval.strval);*/
    				return STRING;
    			}/*string*/
    '(\.|[^'
    ])*$		{ yyerror("Unterminated string %s", yytext); }
    "(\.|[^"
    ])*$		{ yyerror("Unterminated string %s", yytext); }
    
    
    X'[0-9A-F]+' |  
    0X[0-9A-F]+  	{ yylval.strval = strdup(yytext); return STRING; }
    
    
    0B[01]+      |
    B'[01]+'     { yylval.strval = strdup(yytext); return STRING; }
    
    
    [-+&~|^/%*(),.;!]   { return yytext[0]; }
    
    "&&"	{ return ANDOP; }
    "||"	{ return OR; }
    
    "<"	{ yylval.subtok = 1; return COMPARISON; }
    ">"	{ yylval.subtok = 2; return COMPARISON; }
    "!="	|
    "<>"	{ yylval.subtok = 3; return COMPARISON; }
    "="	{ yylval.subtok = 4; return COMPARISON; }
    "<="	{ yylval.subtok = 5; return COMPARISON; }
    ">="	{ yylval.subtok = 6; return COMPARISON; }
    "<=>"	{ yylval.subtok = 12; return COMPARISON; }
    
    "<<"	{ yylval.subtok = 1; return SHIFT; }
    ">>"	{ yylval.subtok = 2; return SHIFT; }
    
    
    [A-Za-z][A-Za-z0-9_]*	{ yylval.strval = strdup(yytext);
    			  /*printf("name 1=%s
    ",yylval.strval);*/
                              return NAME; }
    `[^`/\.
    ]+`           { yylval.strval = strdup(yytext+1);
    			  /*printf("name 2=%s
    ",yylval.strval);*/
                              yylval.strval[yyleng-2] = 0;
                              return NAME; }
    
    `[^`
    ]*$               { yyerror("unterminated quoted name %s", yytext); }
    
    
    @[0-9a-z_.$]+ |
    @"[^"
    ]+" |
    @`[^`
    ]+` |
    @'[^'
    ]+' { yylval.strval = strdup(yytext+1);  return USERVAR; }
    
    @"[^"
    ]*$ { yyerror("unterminated quoted user variable %s", yytext); }
    @`[^`
    ]*$ { yyerror("unterminated quoted user variable %s", yytext); }
    @'[^'
    ]*$ { yyerror("unterminated quoted user variable %s", yytext); }
    
    ":="     { return ASSIGN; }
    
    #.*		;
    "--"[ 	].*	;
    
    "/*"            { oldstate = YY_START; BEGIN COMMENT; }
    <COMMENT>"*/"   { BEGIN oldstate; }
    <COMMENT>.|
       ;
    <COMMENT><<EOF>> { yyerror("unclosed comment"); }
    
    
    [ 	
    ]         /* white space */
    .               { yyerror("mystery character '%c'", *yytext); }
    
    %%
    
    


    这一部分呢就是对 每一个我们自己定义的满足正则的识别

    接下来是对词的语法识别

    %{
    #include <stdlib.h>
    #include <stdarg.h>
    #include <string.h>
    #include <stdio.h>
    #include <malloc.h>
    char * parsetreeroot=NULL;
    extern "C" int yylex();
    extern "C" int yyparse();
    extern "C" void yyerror(const char *s, ...);
    char globalInputText[10000];
    int globalReadOffset;
    int readInputForLexer( char *buffer, int *numBytesRead, int maxBytesToRead );
    char * mystrcat(char *s1,char *s2)
    {
    	char *p1=(char *)malloc(strlen(s1)+strlen(s2)+1);
    	strcpy(p1,s1);
    	strcat(p1,s2);
    	return p1;
    }
    %}
    %locations
    %union {
    	int intval;		
    	double floatval;	
    	char *strval;
    	int subtok;
    }
    %token <strval> NAME
    %token <strval> STRING
    %token <strval> INTNUM
    %token <strval> BOOL
    %token <strval> APPROXNUM
    %token <strval> USERVAR
    
    %type <strval> stmt_root  create_stmt para_list definition  data_type pro_block pro_parameters declare_list set_list 
    %type <strval> assign_var  pro_body pro_stmt_list sql_stmt expr
    
    
    
    %right ASSIGN
    %left OR
    %left XOR
    %left ANDOP
    
    %left NOT '!'
    %left BETWEEN
    %left <subtok> COMPARISON /* = <> < > <= >= <=> */
    %left '|'
    %left '&'
    %left <subtok> SHIFT /* << >> */
    %left '+' '-'
    %left '*' '/' '%' MOD
    %left '^'
    
    %token CREATE
    %token PROCEDURE
    %token PRONAME
    %token DECLARE
    %token SET
    %token BEGINT
    %token END
    %token SQL
    
    %token INT
    %token VARCHAR
    %token DATE
    %token TIME
    %token DOUBLE
    
    %token IF
    %token NOT
    %token EXISTS
    %token THEN
    %token ELSE
    %token ENDIF
    %token FOR
    %token WHEN
    %token WHILE
    %start stmt_root
    %%
    
    stmt_root: create_stmt pro_block { $$=mystrcat($1,$2); parsetreeroot=$$;}
    ;
    create_stmt: CREATE PROCEDURE  NAME '(' para_list ')' 
    		{
    			char *temp=mystrcat("create procedure ",$3);
    			temp=mystrcat(temp,"(");
    			temp=mystrcat(temp,$5);
    			$$=mystrcat(temp,")(create)
    ");
    		}
    ;
    /*
    opt_if_not_exists:	      { $$ = 0; } 
       | IF NOT EXISTS            { $$ = 1; } 
       ;
    */
    para_list: definition { $$=$1; }
    |definition ',' para_list 
    		{	
    			
    			char *temp=mystrcat($1,",");
    			$$=mystrcat(temp,$3); 
    		}
    ;
    definition: USERVAR data_type 
    		{	
    			
    			char *temp=mystrcat($1," ");
    			$$=mystrcat(temp,$2);
    			
    		}
    ;
    
    data_type:
       DATE 					{$$="date"; }
       | TIME					{$$="time"; }
       | VARCHAR '(' INTNUM ')' 			{$$="varchar"; }
       | INT 					{$$="int"; }
       | DOUBLE 					{$$="double"; }	
       ;
    
    pro_block: BEGINT pro_parameters pro_body END 
    		{ 
    			char *temp=mystrcat("begin
    ",$2);
    			temp=mystrcat(temp,"");
    			temp=mystrcat(temp,$3);
    			$$=mystrcat(temp,"end");
    			//printf("pro_body %s
    ",$3);
    		}
    ;
    
    pro_parameters: declare_list ';' { $$=mystrcat($1,";(declare)
    ");}
    |pro_parameters  declare_list ';' 
    		{ 
    			char *temp=mystrcat($1,$2);
    			$$=mystrcat(temp,";(declare)
    ");
    		}
    |pro_parameters  set_list ';' 
    		{
    	 		char *temp=mystrcat($1,$2);
    			$$=mystrcat(temp,";(set)
    ");
    		}
    ;
    
    declare_list:
    |DECLARE definition 
    		{
    			$$=mystrcat("declare ",$2);
    		}
    |declare_list ',' definition 
    		{ 
    			char *temp=mystrcat($1,",");
    			$$=mystrcat(temp,$3); 
    		}
    ;
    
    set_list:
    |SET assign_var 
    		{
    			$$=mystrcat("set ",$2); 
    		}
    | set_list ',' assign_var 
    		{	
    			char *temp=mystrcat($1,",");
    			$$=mystrcat(temp,$3); 
    		}
    ;
    
    assign_var : USERVAR COMPARISON expr
    		{ 	
    			char *temp=mystrcat($1,"=");
    			$$=mystrcat(temp,$3); 
    		}
    ;
    
    expr: NAME         { $$=$1;}
       | STRING        { $$=$1;}
       | INTNUM        { $$=$1;}
       | APPROXNUM 	   { $$=$1;}	
       | BOOL          { $$=$1;}
       ;
    
    pro_body :  pro_stmt_list { $$=$1; }
    ;
    pro_stmt_list: sql_stmt {$$=$1; }
    |pro_stmt_list  sql_stmt 
    		{
    			$$=mystrcat($1,$2);
    		}
    ;
    sql_stmt: 
    |SQL NAME ';' { $$=mystrcat($2,";(sql)
    ");}
    ;
    %%
    /*
    int main(int argc, char* argv[])
    {
    	yyparse();
    }*/
    int readInputForLexer( char *buffer, int *numBytesRead, int maxBytesToRead ) {
    	int numBytesToRead = maxBytesToRead;
    	int bytesRemaining = strlen(globalInputText)-globalReadOffset;
    	int i;
    	if ( numBytesToRead > bytesRemaining ) { numBytesToRead = bytesRemaining; }
    	for ( i = 0; i < numBytesToRead; i++ ) {
    		buffer[i] = globalInputText[globalReadOffset+i];
    	}
    	*numBytesRead = numBytesToRead;
    	globalReadOffset += numBytesToRead;
    	return 0;
    }
    void yyerror(const char *s, ...)
    {
        fprintf(stderr, "error: %s
    ", s);
    }
    void zzerror(const char *s, ...)
    {
    	extern int  yylineno;
    
    	va_list ap;
    	va_start(ap, s);
    
    	fprintf(stderr, "%d: error: ", yylineno);
    	vfprintf(stderr, s, ap);
    	fprintf(stderr, "
    ");
    }
    
    int yywrap(void)
    {
        return 1;
    }
    char* getsql()
    {	
    	return parsetreeroot;
    }
    
    
    


     

    这部分就是对上一个识别出来的词 进行顺序上的确定,构成一个完整的语法

    这些须要在linux环境下进行调试

    bison -d 文件名称

    flex 文件名称

  • 相关阅读:
    ubuntu 下python安装及hello world
    mongodb数据库学习【安装及简单增删改查】
    samba服务器共享开发【windows下开发linux网站】
    系统架构一:snmp+mrtg服务器监控
    记.gitignore的一次惊心动魄
    第一章 引论 第二章 算法分析
    渗透测试实践指南(1)
    day7
    day5 io模型
    day4(带)
  • 原文地址:https://www.cnblogs.com/mfrbuaa/p/4296114.html
Copyright © 2011-2022 走看看