zoukankan      html  css  js  c++  java
  • 编译原理:正规式转变成DFA算法

    //将正规式转变成NFA
    package hjzgg.formal_ceremony_to_dfa; import java.util.ArrayList;
    class Edge{ public int u, v; public char key; public Edge(int u, int v, char key) { super(); this.u = u; this.v = v; this.key = key; } @Override public String toString() { return u + "->" + v + " " + key; } @Override public boolean equals(Object arg0) { Edge tmp = (Edge)arg0; return tmp.u==this.u && tmp.v==this.v && tmp.key==this.key; } @Override public int hashCode() { return u+v+key; } } class NFA{ public static final int MAX_NODE = 100; private boolean finalState[] = new boolean[MAX_NODE];//记录每一个节点是否为终态 private String formal_ceremony;//正规式字符串 private int cnt_node=1;//记录节点的个数 private Map<Integer, Integer> endNode = new TreeMap<Integer, Integer>();//每一个开始节点对应的终端节点 private ArrayList<Edge> nodeAl = new ArrayList<Edge>(); private Vector<Pair>[] g = new Vector[MAX_NODE];//NFA图 private Set<Character> st = new TreeSet<Character>();//正规式中出现的字符的集合 public NFA(String formal_ceremony) { super(); this.formal_ceremony = formal_ceremony; } private void addEdge(int u, int v, char ch){ nodeAl.add(new Edge(u, v, ch)); if(g[u] == null) g[u] = new Vector<Pair>(); g[u].add(new Pair(v, ch)); if(ch!='$') st.add(ch); } public boolean kernel_way(int fa, int ld, int rd, boolean isClosure){//fa表示区间的开始点,正规式的区间[ld, rd], isClosure表示这段区间查是否存在闭包 if(ld < 0 || rd >= formal_ceremony.length()){ System.out.println("正规式不正确---发生数组越界!"); return false; } int pre_node = fa; int inBracket = 0;//判断'|'是否在括弧内 for(int i=ld; i<=rd; ++i){ if(formal_ceremony.charAt(i)=='(') ++inBracket; else if(formal_ceremony.charAt(i)==')') --inBracket; else if(formal_ceremony.charAt(i)=='|' && 0==inBracket){ if(!kernel_way(fa, ld, i-1, isClosure)) return false; if(!kernel_way(fa, i+1, rd, isClosure)) return false; return true; } } for(int i=ld; i<=rd; ++i){ if(formal_ceremony.charAt(i)=='('){//又是一个子区间 //寻找和 该 '('相匹配的')' int cntLeftBracket = 0;//统计遍历过程中'('出现的次数,遇到')'减去1 int posRightBracket = -1;//记录相匹配的')'的位置 int posLeftBracket = i; for(int j=i+1; j<=rd; ++j){ if(formal_ceremony.charAt(j)=='(') ++cntLeftBracket; else if(formal_ceremony.charAt(j)==')'){ if(cntLeftBracket == 0){ posRightBracket = j; break; } --cntLeftBracket; } } if(posRightBracket == -1){//出错 System.out.println("正规式出错----括弧不匹配!"); return false; } int nodeFather = 0;//括弧内正则式的开始节点 if(posRightBracket+1 <= rd && formal_ceremony.charAt(posRightBracket+1)=='*'){ i = posRightBracket+1;//过滤掉"()*" addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 pre_node = cnt_node; nodeFather = cnt_node; addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 pre_node = cnt_node; //处理()*括弧内的正规式 if(!kernel_way(nodeFather, posLeftBracket+1, posRightBracket-1, true)) return false; } else { nodeFather = pre_node; if(!kernel_way(nodeFather, posLeftBracket+1, posRightBracket-1, false))//对于"(101)", 看成101 return false; i = posRightBracket; } } else {//单个字符 if(formal_ceremony.charAt(i)==')') continue; if(i+1 <= rd && formal_ceremony.charAt(i+1)=='*'){ addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 pre_node = cnt_node; addEdge(pre_node, pre_node, formal_ceremony.charAt(i)); if(i+1==rd && isClosure) addEdge(pre_node, fa, '$');//表示这一条边为空并且是连接到父亲节点 else{ if(endNode.containsKey(fa)) addEdge(pre_node, endNode.get(fa), '$'); else{ addEdge(pre_node, ++cnt_node, '$');//表示这一条边为空 if(i==rd) endNode.put(fa, cnt_node);//记录非闭包状态下 第一个节点对应的最后一个节点 } } pre_node = cnt_node; ++i;//过滤* } else { if(i==rd && isClosure){//是闭包的情况 addEdge(pre_node, fa, formal_ceremony.charAt(i)); } else{ if(endNode.containsKey(fa)) addEdge(pre_node, endNode.get(fa), formal_ceremony.charAt(i)); else{ addEdge(pre_node, ++cnt_node, formal_ceremony.charAt(i)); if(i==rd) endNode.put(fa, cnt_node);//记录非闭包状态下 第一个节点对应的最后一个节点 } } pre_node = cnt_node; } } } return true; } private void checkFinalState(){//检查哪一个节点是终态 for(int i=1; i<=cnt_node; ++i){ int cc = 0; if(g[i] == null){//表明是终态 finalState[i] = true; continue; } for(int j=0; j<g[i].size(); ++j) if(g[i].elementAt(j).v != i) ++cc; if(cc == 0)//表明是终态 finalState[i] = true; } } public boolean[] getFinalState(){ return finalState; } public Vector<Pair>[] getNFAGraphics(){ if(kernel_way(1, 0, formal_ceremony.length()-1, false)){ // for(Edge e : nodeAl)//打印NFA // System.out.println(e); checkFinalState(); return g; } return null; } public Set<Character> getCharacterSet(){ return st; } public void outputNFA(){ if(kernel_way(1, 0, formal_ceremony.length()-1, false)){ checkFinalState(); for(Edge e : nodeAl) System.out.println(e); } } } /* * 将正规式转换成NFA * */ public class ToNFA { public static void main(String[] args){ String formal_ceremony = "0*(100*)*0*"; // String formal_ceremony = "1(1010*|1(010)*1)*0"; // String formal_ceremony = "1(0|1)*101"; // String formal_ceremony = "0*1*(010)0*1*"; // String formal_ceremony = "(0|1|2)*"; // String formal_ceremony = "0|1"; // String formal_ceremony = "0|1|2|3"; // String formal_ceremony = "(0|1|6)|(2|3)|(4|5)"; // String formal_ceremony = "(0|1)*|(2|3)*"; // String formal_ceremony = "((10)|(01)*|(0|1))"; NFA nfa = new NFA(formal_ceremony); nfa.outputNFA(); } }
    //将NFA转变成确定化DFA
    package hjzgg.formal_ceremony_to_dfa; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedList; import java.util.Queue; import java.util.Set; import java.util.Vector;
    class Pair { public int v; public char ch; public Pair(int v, char ch) { super(); this.v = v; this.ch = ch; } } class MyHashSet extends HashSet<Integer>{//重写 set 集合的 hashcode()和equals()方法 private int state; public void setState(int state){ this.state = state; } public int getState(){ return state; } @Override public boolean equals(Object arg0) { MyHashSet tmp = (MyHashSet)arg0; if(tmp.size() != this.size()) return false; Iterator<Integer> it = this.iterator(); while(it.hasNext()){ if(!tmp.contains(it.next())) return false; } return true; } @Override public int hashCode() { int sum = 0; Iterator<Integer> it = this.iterator(); while(it.hasNext()) sum += (((java.lang.Integer)it.next()).intValue()); return sum; } } class DefinedNFA{ private int dfaNode = 0;//defined DFA节点的个数 private boolean[] finalState = null;//表示NFA中哪一个节点是终态 private boolean[] newFinalState = new boolean[NFA.MAX_NODE] ; private Vector<Pair>[] g = null;//NFA 图 private Set<Edge>edgeSet = new HashSet<Edge>(); //标记图中的边是否被访问过 private MyHashSet st = null; //集合,表示每一个子集状态 private Queue<MyHashSet> queue = new LinkedList<MyHashSet>();//存放要执行的子集状态 private Set<MyHashSet> sst = new HashSet<MyHashSet>(); private Set<Character> characterSet = null;//正规式中的字符的集合 private ArrayList<Edge> nodeAl = new ArrayList<Edge>();//NFA边的集合 public DefinedNFA(Vector<Pair>[] g, Set<Character> characterSet, boolean[] finalState) { super(); this.g = g; this.characterSet = characterSet; this.finalState = finalState; } public Set<Character> getCharacterSet(){ return characterSet; } public int getDfaNode(){ return dfaNode; } public boolean[] getNewFinalState(){ return newFinalState; } public ArrayList<Edge> getNodeAl(){ return nodeAl; } private void dfs(int u, char ch){ if(g[u]==null) return ; int len = g[u].size(); for(int i=0; i<len; ++i){ Pair pair = g[u].elementAt(i); Edge edge = new Edge(u, pair.v, pair.ch); if(!edgeSet.contains(edge) && pair.ch==ch){ edgeSet.add(edge); st.add(pair.v); dfs(pair.v, '$'); } } } public void checkIsFinalState(Set<Integer> st, int state){ Iterator<Integer> it = st.iterator(); while(it.hasNext()){ int val = it.next(); if(finalState[val]) newFinalState[state] = true; } } private void initFirstSet(){ edgeSet.clear(); st = new MyHashSet(); st.add(1); st.setState(++dfaNode); dfs(1, '$'); checkIsFinalState(st, dfaNode); sst.add(st); queue.add(st); } private void addEdge(int u, int v, char ch){ nodeAl.add(new Edge(u, v, ch)); } public void ToStateMatrix(){ initFirstSet(); while(!queue.isEmpty()){ MyHashSet myset = queue.poll(); for(Character ch : characterSet){ st = new MyHashSet(); for(Integer i : myset){ edgeSet.clear(); dfs(i, ch); } if(st.size()>0){ if(!sst.contains(st)){ sst.add(st); queue.add(st); st.setState(++dfaNode); checkIsFinalState(st, dfaNode); } else { Iterator<MyHashSet> it = sst.iterator(); while(it.hasNext()){ MyHashSet tmp = it.next(); if(tmp.equals(st)){ st = tmp; break; } } } addEdge(myset.getState(), st.getState(), ch); } } } } public void outputDFA(){ ToStateMatrix();//有状态转换矩阵得到defined NFA for(Edge e : nodeAl) System.out.println(e); } } public class ToDefinedDFA { public static void main(String[] args) { // String formal_ceremony = "((10)|(01)*|(0|1))"; // String formal_ceremony = "(0|1|6)|(2|3)|(4|5)"; // String formal_ceremony = "1(0|1)*101"; String formal_ceremony = "0*(100*)*0*"; NFA nfa = new NFA(formal_ceremony); DefinedNFA definedDFA = new DefinedNFA(nfa.getNFAGraphics(), nfa.getCharacterSet(), nfa.getFinalState()); definedDFA.outputDFA(); } }
    //将确定化DFA最小化
    package hjzgg.formal_ceremony_to_dfa; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Map.Entry; import java.util.Set;
    class MinimumDFA{ private boolean[] newFinalState = null;//由确定化DFA得到 private ArrayList<Edge> nodeAl = null;//由确定化DFA得到 private int dfaNode;//确定化DFA节点的个数 private Set<Character> characterSet = null;//正规式中的字符的集合 private ArrayList<Set<Integer>> setList = new ArrayList<Set<Integer>>(); public MinimumDFA(boolean[] newFinalState, ArrayList<Edge> nodeAl, int dfaNode, Set<Character> characterSet) { super(); this.newFinalState = newFinalState; this.nodeAl = nodeAl; this.dfaNode = dfaNode; this.characterSet = characterSet; } private void init(){//利用分割法将集合分成终态和非终态 Set<Integer> finalStateSet = new HashSet<Integer>(); Set<Integer> NofinalStateSet = new HashSet<Integer>(); for(int i=1; i<=dfaNode; ++i) if(newFinalState[i])//终态 finalStateSet.add(i); else NofinalStateSet.add(i); setList.add(finalStateSet); setList.add(NofinalStateSet); } public void toMinimumDfa(){ init(); boolean flag = true; ArrayList<Set<Integer>> tmpSetList = new ArrayList<Set<Integer>>(); while(flag){ flag = false; hjzgg: for(int k=0; k<setList.size(); ++k){ Set<Integer> st = setList.get(k); if(st.size()<=1) continue; for(Character ch : characterSet){ Map<Integer, Integer> mp = new HashMap<Integer, Integer>(); for(int i=0; i<nodeAl.size(); ++i){//st集合(也就是map的val值)在 ch这个点对应的集合 {st}a = {...} Edge edge = nodeAl.get(i); if(edge.key == ch && st.contains(edge.u)) mp.put(edge.u, edge.v); }

                for(Integer i : st)
                  if(!mp.containsKey(i))//表明i节点对应的是一条空边
                    mp.put(i, -1);

    //将st集合拆分成两个不想交的集合
                        Set<Integer> firstSet = new HashSet<Integer>();
                        Set<Integer> secondSet = new HashSet<Integer>();
                        for(int j=0; j<setList.size(); ++j){
                            firstSet.clear();
                            secondSet.clear();
                            Set<Integer> tmpSt = setList.get(k);
                            for(Entry<Integer, Integer> entry : mp.entrySet()){//返回此映射中包含的映射关系的 set 视图。返回的 set 中的每个元素都是一个 Map.Entry
                                  if(tmpSt.contains(entry.getValue()))
                                      firstSet.add(entry.getKey());
                                  else secondSet.add(entry.getKey());
                            }
                            if(firstSet.size()!=0 && secondSet.size()!=0){
                                flag = true;//如果发现可以拆分的集合,则继续最顶层的while循环
                                for(Integer i : tmpSt){//将firstSet 和 secondSet中都没有的元素添加到firstSet中
                                    if(!firstSet.contains(i) && !secondSet.contains(i))
                                        firstSet.add(i);
                                }
                                setList.remove(k);
                                setList.add(firstSet);
                                setList.add(secondSet);
                                break hjzgg;
                            }
                        }
                    }
                }
            }
    //        for(int k=0; k<setList.size(); ++k)//输出最终的集合划分
    //            System.out.println(setList.get(k));
    //        System.out.println("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&");
            for(int k=0; k<setList.size(); ++k){
                Set<Integer> st = setList.get(k);
                if(st.size() > 1){//看成是一个等价的状态,选择第一个元素当作代表
                    int first=0;
                    for(Integer i : st){//取得第一个元素
                        first = i;
                        break;
                    }
                    ArrayList<Edge> tmpList = new ArrayList<Edge>();
                    for(int i=0; i<nodeAl.size(); ++i){//遍历所有的边,找到不是first
                        Edge edge = nodeAl.get(i);
                        if(st.contains(edge.u) && edge.u!=first){
                            nodeAl.remove(i);
                            --i;
                        } else if(st.contains(edge.v) && edge.v!=first){
                            nodeAl.remove(i);
                            --i;
                            tmpList.add(new Edge(edge.u, first, edge.key));
                        }
                    }
                    nodeAl.addAll(tmpList);
                }
            }
        }
        
        public void outputMinimumDFA(){
    //        for(int i=0; i<nodeAl.size(); ++i)//输出未确定化的DFA
    //            System.out.println(nodeAl.get(i));
            toMinimumDfa();
            for(int i=0; i<nodeAl.size(); ++i)
                System.out.println(nodeAl.get(i));
        }
    }
    
    public class ToMinimumDFA {
    
        public static void main(String[] args) {
    //        String formal_ceremony = "1(0|1)*101";
            String formal_ceremony = "0*(100*)*0*";
            NFA nfa = new NFA(formal_ceremony);
            DefinedNFA definedDFA = new DefinedNFA(nfa.getNFAGraphics(), nfa.getCharacterSet(), nfa.getFinalState());
            definedDFA.ToStateMatrix();
            MinimumDFA minimumDFA = new MinimumDFA(definedDFA.getNewFinalState(), definedDFA.getNodeAl(), definedDFA.getDfaNode(), definedDFA.getCharacterSet());
            minimumDFA.outputMinimumDFA();
        }
    
    }
  • 相关阅读:
    ZOJ Problem Set–2417 Lowest Bit
    ZOJ Problem Set–1402 Magnificent Meatballs
    ZOJ Problem Set–1292 Integer Inquiry
    ZOJ Problem Set–1109 Language of FatMouse
    ZOJ Problem Set–1295 Reverse Text
    ZOJ Problem Set–1712 Skew Binary
    ZOJ Problem Set–1151 Word Reversal
    ZOJ Problem Set–1494 Climbing Worm
    ZOJ Problem Set–1251 Box of Bricks
    ZOJ Problem Set–1205 Martian Addition
  • 原文地址:https://www.cnblogs.com/hujunzheng/p/4421132.html
Copyright © 2011-2022 走看看