zoukankan      html  css  js  c++  java
  • LR(1) 语法分析 (Python)

    内容

    • 生成 LR(1) 规范项集族
    • 构造 LR(1) 分析表
    • 自动机和分析表可视化
    • 基于栈的自顶向上分析器
    • 二义性文法处理(本程序中遇冲突直接覆写,可自由调整)
    # 项:(prod_id, dot_pos, ahead)
    import copy
    
    
    def closure(prods, ps, first):
        ps = ps[:]
        for (prod_id, dot_pos, ahead) in ps:
            prod = prods[prod_id]
            if dot_pos + 1 < len(prod):
                cur = prod[dot_pos+1]
                # next_symbol = prod[dot_pos+2] if dot_pos+2 < len(prod) else ahead
                nss = []
                tmpseq = prod[dot_pos+2:] + [ahead]
                i = 0
                first['$'] = ['$']
                while i < len(tmpseq):
                    for x in first[tmpseq[i]]:
                        if (x != '$' or tmpseq[i] == '$') and x not in nss:
                            nss.append(x)
                    if '$' not in first[tmpseq[i]]:
                        break
                    i += 1
                for next_symbol in nss:
                    for ex_prod_id, ex_prod in enumerate(prods):
                        if ex_prod[0] == cur:
                            new_item = (ex_prod_id, 0, next_symbol)
                            if new_item not in ps:
                                ps.append(new_item)
        return ps
    
    
    def goto(prods, ps, a, first):
        ans = []
        for (i, j, ah) in ps:
            prod = prods[i]
            if j + 1 < len(prod):
                cur = prod[j + 1]
                if cur == a:
                    new_item = (i, j+1, ah)
                    if new_item not in ans:
                        ans.append(new_item)
        return closure(prods, ans, first)
    
    
    def prod2str(prod, dotpos):
        prod = prod[:]
        prod.append("")
        prod[dotpos + 1] = '·' + prod[dotpos + 1]
        return prod[0] + '→' + ''.join(prod[1:])
    
    
    def make_lr1(prods):
    
        table = [{}]
        words = list(set(j for i in prods for j in i))
        nonterminals = list(set(i[0] for i in prods))
        terminals = [i for i in words if i not in nonterminals]
        terminals .append('$')
        nonterminals.sort()
        terminals.sort()
    
        # 预处理 first 集合
        first = {}
        for i in terminals:
            first[i] = [i]
        for i in nonterminals:
            first[i] = []
    
        while True:
            old_first = copy.deepcopy(first)
            for prod in prods:
                left = prod[0]
                right = prod[1:]
                i = 0
                while i < len(right):
                    for x in first[right[i]]:
                        if x != "$" and x not in first[left]:
                            first[left].append(x)
                    if "$" not in first[right[i]]:
                        break
                    i += 1
                if i == len(right):
                    if "$" not in first[left]:
                        first[left].append("$")
            if old_first == first:
                break
    
        I0 = closure(prods, [(0, 0, '$')], first)
        cc = [I0]
        i = 0
    
        while i < len(cc):
            ccc = {}
            for j in cc[i]:
                if (j[0], j[1]) not in ccc.keys():
                    ccc[(j[0], j[1])] = j[2]
                else:
                    ccc[(j[0], j[1])] = j[2]
            ccc = [[i[0], i[1], j] for i, j in ccc.items()]
            print("\033[35m[I%d]:\n  \033[31m" % i, '\n   \033[31m'.join(prod2str(prods[j[0]], j[1])+' \t\033[33m'+j[2]+'\033[30m'
                  for j in ccc), end="\n   \033[32m")
            candiwords = []
            for j in cc[i]:
                prod = prods[j[0]]
                dot_pos = j[1]+1
                if dot_pos < len(prod) and prod[dot_pos] not in candiwords:
                    candiwords.append(prod[dot_pos])
                if dot_pos == len(prod):
                    if prod[0] != "S'":
                        if j[2] not in table[i].keys():
                            table[i][j[2]] = 'r%d' % j[0]
                        else:
                            table[i][j[2]] = 'r%d' % j[0]
                    else:
                        if '$' not in table[i].keys():
                            table[i]['$'] = 'acc'
                        else:
                            table[i]['$'] += 'acc'
            for j in candiwords:
                tmp = goto(prods, cc[i], j, first)
                if len(tmp) > 0 and tmp not in cc:
                    cc.append(tmp)
                    table.append({})
                if len(tmp) > 0:
                    print(j, cc.index(tmp), sep=",", end="  ")
                    if j in terminals:
                        if j not in table[i].keys():
                            table[i][j] = "s%d" % cc.index(tmp)
                        else:
                            table[i][j] = "s%d" % cc.index(tmp)
                    else:
                        table[i][j] = "%d" % cc.index(tmp)
            i += 1
            print("\033[0m")
        head = ['I']
        for j in terminals:
            head.append(j)
        for j in nonterminals:
            if j == "S'":
                continue
            head.append(j)
        print('---'.join(''+'-'*(5) for i in head))
        print(' | '.join(i+' '*(5-len(i)) for i in head))
        print('-+-'.join(''+'-'*(5) for i in head))
        for i, line in enumerate(table):
            lo = [str(i)]
            for j in terminals:
                if j in line.keys():
                    lo.append(table[i][j])
                else:
                    lo.append("")
            for j in nonterminals:
                if j == "S'":
                    continue
                if j in line.keys():
                    lo.append(table[i][j])
                else:
                    lo.append("")
            print(' | '.join(i+' '*(5-len(i)) for i in lo))
        return table
    
    
    prod_file = open("prod.txt", "r")
    prod_lines = prod_file.readlines()
    prod_lines = [i[:-1] for i in prod_lines]
    prod_lines = [i[:-1] if i[-1] == ' ' else i for i in prod_lines]
    
    prods = [i.split(" ")[:1] + i.split(" ")[2:] for i in prod_lines]
    print(prods)
    table = make_lr1(prods)
    
    tokens = []
    token_file = open("tokens.txt", "r")
    token_lines = token_file.readlines()
    token_lines = [i[:-1] for i in token_lines]
    token_lines = [i[:-1] if i[-1] == ' ' else i for i in token_lines]
    token_lines = [i.split(' ') for i in token_lines]
    tokens = [i[1] for i in token_lines]
    tokens_str = [i[2] for i in token_lines]
    tokens.append("$")
    tokens_str.append("")
    
    
    stack_token = ['#']
    stack_state = [0]
    
    input_ptr = 0
    
    while len(stack_token) > 0:
        cur_token = tokens[input_ptr]
        cur_state = stack_state[-1]
        if cur_token not in table[cur_state].keys():
            print("errorA")
            break
        action = table[cur_state][cur_token]
        if action == "acc":
            print("succeed")
            break
        elif action[0] == 's':
            next_state = int(action[1:])
            stack_token.append(cur_token)
            stack_state.append(next_state)
            input_ptr += 1
            print("shift", cur_token)
        else:
            prod_id = int(action[1:])
            prod_len = len(prods[prod_id]) - 1
            print("reduce", prods[prod_id])
            stack_token = stack_token[:-prod_len]
            stack_state = stack_state[:-prod_len]
            cur_state = stack_state[-1]
            goto_nt = prods[prod_id][0]
            if goto_nt not in table[cur_state].keys():
                print("errorB")
                break
            next_state = int(table[cur_state][goto_nt])
            stack_token.append(goto_nt)
            stack_state.append(next_state)
    
    

    文法示例

    S' -> L 
    L -> L S 
    L -> S 
    S -> S0 ; 
    S -> { L }
    S0 -> Sa 
    Sa -> Id = E 
    S -> IfC S
    S -> IfCEl S
    IfCEl -> IfC S else
    IfC -> if ( C )
    C -> C1 
    C -> Ca C1 
    Ca -> C && 
    C1 -> C2
    C1 -> Co C2
    Co -> C1 ||  
    C2 -> C3
    C2 -> ! C3
    C3 -> ( C )
    C -> E Rop E 
    Rop -> ==
    Rop -> !=
    Rop -> <
    Rop -> <=
    Rop -> >
    Rop -> >=
    E -> E + Et
    E -> E - Et
    E -> Et
    Et -> Et * Ef
    Et -> Et / Ef
    Et -> Et % Ef
    Et -> Ef 
    Ef -> ( E )
    Ef -> Id 
    E -> Const
    

    项集族输出示例

    image

    分析表输出示例

    image

    分析输入示例

    if (1 == 2) if (3 == 4) a = 1; else a = 2; else a = 3;
    

    分析输出示例

    image

  • 相关阅读:
    详解 字符转换流
    详解 编码和解码
    详解 字符流
    详解 序列输入流
    详解 数据输入输出流
    八皇后
    这次我们从底层把线程说清楚
    四数之和递归
    PCB 内网实现《OCR文字识别》实现逻辑
    PCB 线路板人生
  • 原文地址:https://www.cnblogs.com/mollnn/p/15785388.html
Copyright © 2011-2022 走看看