zoukankan      html  css  js  c++  java
  • egrep命令的实现 分类: 编译原理 2014-06-01 23:41 329人阅读 评论(0) 收藏

    本程序实现了egrep命令,首先将正则表达式转换为NFA,并实现模拟NFA的算法。

    本程序使用flex实现词法分析,bison实现语法分析

    若给定的一行字符串中存在一个字串能被该NFA接受,则输出整行。
    所用语法如下:
    S-->S|S
       |SS
       |S*
       |(S)

       |a


    bison程序:

    %{
    #include <string>
    #include <iostream>
    #include <fstream>
    #include <sstream>
    #include <vector>
    #include <stack>
    #include <algorithm>
    #include <boost/foreach.hpp>
    #include <set>
    #include <map>
    using namespace std;
    typedef string state;
    typedef set<string> states;
    typedef map<string, states> transition;
    class value
    { 
      public:
      string* symbol;
      state *start;
      state *final;
      map<state,transition> *transitions;
    };
    char* filename;
    int number;//用于生成状态号
    int yylex(void);
    void yyerror(char const *);
    template<class TYPE>
    string transfer(TYPE init);
    void Scopy(value S,value &result);
    void Skleen(value S,value &result);
    void SconnectS(value S1,value S2,value &result);
    void SorS(value S1,value S2,value &result);
    states epsilonClosure(states T,map<state,transition> transitions);
    states move(states T, string a,map<state,transition> transitions);
    void simulateNFA(string str,value NFA); 
    %}
    
    %define api.value.type { class value }
    //优先级为:闭包>连接>并
    %token ASCII
    %token LP
    %token RP
    %left OR
    %left CONNECT
    %left KLEEN
    %expect 4
    %%
    lines: lines S '
    '   
           {
             number=0;
             ifstream in(filename);
             string line;
             while(getline(in,line))
             simulateNFA(line,$2);
           }
           | lines '
    '
           |
           | error '
    ' {yyerrok;}
           ;
    S:   S KLEEN   
         {Skleen($1,$$);}
        |S S %prec CONNECT   
         {SconnectS($1,$2,$$);}
        |S OR S    
         {SorS($1,$3,$$);} 
        |LP S RP   
         {Scopy($2,$$);} 
        |ASCII 
        {
          $$.start=new string(transfer<int>(number++));
          $$.final=new string(transfer<int>(number++));
          states accetping;
          accetping.insert(*$$.final);
          $$.transitions=new map<state,transition>();
          (*$$.transitions)[*$$.start][*$$.symbol]=accetping;
        }
        ;
    %%
    #include "lex.yy.c"
    
    int main(int argc,char*argv[]) {
        number=0;
        filename=argv[1];
        return yyparse();
    }
    
    void yyerror(char const *s)
    { 
       cout<<s<<endl;
    }
    states epsilonClosure(states T,map<state,transition> transitions) 
    {
        stack<state> S;
        BOOST_FOREACH(state u, T) 
        { S.push(u); }
        while (!S.empty())
        {
            state t = S.top(); 
            S.pop();
            BOOST_FOREACH(state u, transitions[t]["epsilon"]) 
             {
                if (T.count(u) == 0) 
                 {  
                    T.insert(u);
                    S.push(u);
                 }
             }
        }  
        return T;
    }
        
    states move(states T,string a,map<state,transition> transitions) 
    {
        states result; 
        BOOST_FOREACH(state u, T) 
          BOOST_FOREACH(state v, transitions[u][a]) 
           { result.insert(v); }
        return result;
    }
    
    void simulateNFA(string str,value NFA)
    {
     bool flag=false;
    //穷举字串测试,若被NFA受则退出循环
     for(int i=0;i<str.length();i++)
      {
       for(int j=0;j<=str.length()-i;j++)
        {
         string substr;
         substr=str.substr(i,j);
         states S;
         string c;//转移符号
         int count=0;
         if(j==0)
         {c="epsilon";}
         else
         {c=transfer<char>(substr[0]);}
         S.insert(*NFA.start);
         S=epsilonClosure(S,*NFA.transitions); 
         while(count<substr.length())
         {
          S=epsilonClosure(move(S,c,*NFA.transitions),*NFA.transitions);
          c=substr[++count];
         }
         if (S.count(*NFA.final)!= 0) 
         {
           flag=true;
           break;
         }
        }
        if(flag)
        break;
      }
      if(flag)
      cout<<str<<endl;
    }
    
    template<class TYPE>
    string transfer(TYPE init)
    {
      stringstream ss;
      ss<<init;
      string str;
      ss>>str;
      return str;
    }
    /*result-->(S)
    直接拷贝S至转换表至result*/
    void Scopy(value S,value &result)
    { 
      result.start=new string(*S.start);
      result.final=new string(*S.final);
      result.transitions=new map<state,transition>(); 
      copy((*S.transitions).begin(),(*S.transitions).end(),insert_iterator<map<state,transition> >
      (*result.transitions,(*result.transitions).begin()));
    }
    /*result-->S*
    首先拷贝s的转换表至result,然后对result生成新的开始态、结束态,
    并将result的开始态连至S的开始态和result的结束态,边上符号为"epsilon";
    将S的结束态连至S的开始态和result的结束态,边上符号为"epsilon"*/
    void Skleen(value S,value &result)
    {
       result.start=new string(transfer<int>(number++));
       result.final=new string(transfer<int>(number++));
       result.transitions=new map<state,transition>();
       copy((*S.transitions).begin(),(*S.transitions).end(),insert_iterator<map<state,transition> >
       (*result.transitions,(*result.transitions).begin())); 
       states accepting;
       accepting.insert(*S.start);
       accepting.insert(*result.final);
       (*result.transitions)[*result.start]["epsilon"]=accepting;
       (*result.transitions)[*S.final]["epsilon"]=accepting;
    }
    /*result-->S1 S2
    分别将S1、S2的转换表拷贝至result,再将S2的开始态改为S1的结束态*/
    void SconnectS(value S1,value S2,value &result)
    {
      result.start=new string(*S1.start);
      result.final=new string(*S2.final);
      result.transitions=new map<state,transition>(); 
      copy((*S1.transitions).begin(),(*S1.transitions).end(),insert_iterator<map<state,transition> >
      (*result.transitions,(*result.transitions).begin())); 
      copy((*S2.transitions).begin(),(*S2.transitions).end(),insert_iterator<map<state,transition> > 
      (*result.transitions,(*result.transitions).end())); 
      map<state,transition>::iterator it;
      if((it=(*result.transitions).find(*S2.start))!=(*result.transitions).end())
      {
         pair<state,transition> temp;
         temp=make_pair(*S1.final,it->second);
         (*result.transitions).erase(*S2.start); 
         (*result.transitions).insert(temp);
      }
    }
    /*result-->S|S
    分别将S1、S2的转换表拷贝至result,然后对result生成新的开始态、结束态,
    并将result的开始态连至S1和S2的开始态,边上符号为"epsilon";将S1和S2的结
    束态连至result的结束态,边上符号为"epsilon"*/
    void SorS(value S1,value S2,value &result)
    {
      result.start=new string(transfer(number++));
      result.final=new string(transfer(number++));
      result.transitions=new map<state,transition>();
      copy((*S1.transitions).begin(),(*S1.transitions).end(),insert_iterator<map<state,transition> > 
      (*result.transitions,(*result.transitions).begin())); 
      copy((*S2.transitions).begin(),(*S2.transitions).end(),insert_iterator<map<state,transition> >
      (*result.transitions,(*result.transitions).end())); 
      states accepting;
      accepting.insert(*S1.start);
      accepting.insert(*S2.start);  
      (*result.transitions)[*result.start]["epsilon"]=accepting;
      accepting.clear();
      accepting.insert(*result.final);
      (*result.transitions)[*S1.final]["epsilon"]=accepting;
      (*result.transitions)[*S2.final]["epsilon"]=accepting;
    }
    

    flex程序:

    %{
    #include<string>
    %}
    escape "\("|"\)"|"\|"|"\*"
    %%
    [ 	]+   
    
           {return yytext[0];}
    "("      {return LP;}
    ")"      {return RP;}
    "|"      {return OR;}
    "*"      {return KLEEN;}
    {escape} {
              yylval.symbol=new string(transfer(yytext).substr(1,1));
              return ASCII;
             }
    .        {
              yylval.symbol=new string(transfer(yytext));
              return ASCII;
             }
    假设将bison程序拷至biaon.y,flex程序拷至flex.l。

    运行如下:
    flex lex.l
    bison bison.y
    g++ bison.tab.c -ly -ll
    ./a.out filname
    最后可输入待测试正则表达式


    版权声明:本文为博主原创文章,未经博主允许不得转载。

  • 相关阅读:
    有关Python,网络,机器学习,深度学习
    Python map使用
    左旋转字符串,翻转字符串
    使用Python创建二叉树,作为调试程序使用
    batchnorm2d函数理解,numpy数据归一化
    和为s的连续正数序列,和为s的两个数字
    判断是否为平衡二叉树
    原生js格式化json的方法
    ace editor 使用教程
    Vue+webpack+echarts+jQuery=demo
  • 原文地址:https://www.cnblogs.com/luo-peng/p/4646263.html
Copyright © 2011-2022 走看看