zoukankan      html  css  js  c++  java
  • 9 建立基于特征的文法

    1.文法特征

    在本章中,我们将探讨建立在基于规则上的文法中特征的作用。对比特征提取,记录已经自动检测到的特征,我们现在要介绍词和短语的特征

    特征结构包含各种有关文法实体的信息

    文法实体的信息

    CAT(文法类别) ORTH(拼写) REF(指示物) REL(关系)

    kim = {'CAT': 'NP', 'ORTH': 'Kim', 'REF': 'k'}
    chase = {'CAT': 'V', 'ORTH': 'chased', 'REL': 'chase'}
    lee = {'CAT':'NP','ORTH':'Lee','REF':'l'}
    
    # AGT(agent施事的角色) PAT(patient受事角色)
    chase['AGT'] = 'sbj'
    chase['PAT'] = 'obj'
    
    sent = "Kim chased Lee"
    tokens = sent.split()
    
    def lex2fs(word):
        for fs in [kim,lee,chase]:
            if fs['ORTH'] == word:
                return fs
                
    subj, verb, obj = lex2fs(tokens[0]),lex2fs(tokens[1]),lex2fs(tokens[2])
    print(subj, verb, obj )
    verb['AGT'] = subj['REF'] # agent of 'chase' is Kim    k
    verb['PAT'] = obj['REF']  # patient of 'chase' is Lee    l
    for k in ['ORTH','REL','AGT','PAT']: # check featstruct of 'chase'
        print("%-5s => %s"%(k,verb[k]))
    
    surprise = {'CAT':'V','ORTH':'surprised','REL':'surprised','SRC':'sbj','EXP':'obj'}
    

    句法协议

    动词的形态属性同主语名词短语的句法属性一起变化,该过程被称为协议(agreement)

    使用属性和约束

    非正式的语言类别具有属性,例如:名词具有复数的属性。

    import nltk
    print(nltk.data.show_cfg('grammars/book_grammars/feat0.fcfg'))
    # % start S
    # # ###################
    # # Grammar Productions
    # # ###################
    # # S expansion productions
    # S -> NP[NUM=?n] VP[NUM=?n]
    # # NP expansion productions
    # NP[NUM=?n] -> N[NUM=?n]
    # NP[NUM=?n] -> PropN[NUM=?n]
    # NP[NUM=?n] -> Det[NUM=?n] N[NUM=?n]
    # NP[NUM=pl] -> N[NUM=pl]
    # # VP expansion productions
    # VP[TENSE=?t, NUM=?n] -> IV[TENSE=?t, NUM=?n]
    # VP[TENSE=?t, NUM=?n] -> TV[TENSE=?t, NUM=?n] NP
    # # ###################
    # # Lexical Productions
    # # ###################
    # Det[NUM=sg] -> 'this' | 'every'
    # Det[NUM=pl] -> 'these' | 'all'
    # Det -> 'the' | 'some' | 'several'
    # PropN[NUM=sg]-> 'Kim' | 'Jody'
    # N[NUM=sg] -> 'dog' | 'girl' | 'car' | 'child'
    # N[NUM=pl] -> 'dogs' | 'girls' | 'cars' | 'children'
    # IV[TENSE=pres,  NUM=sg] -> 'disappears' | 'walks'
    # TV[TENSE=pres, NUM=sg] -> 'sees' | 'likes'
    # IV[TENSE=pres,  NUM=pl] -> 'disappear' | 'walk'
    # TV[TENSE=pres, NUM=pl] -> 'see' | 'like'
    # IV[TENSE=past] -> 'disappeared' | 'walked'
    # TV[TENSE=past] -> 'saw' | 'liked'
    # None
    

    跟踪基于特征的图表分析器

    tokens1 = 'Kim likes children'.split()
    from nltk import load_parser
    cp = load_parser('grammars/book_grammars/feat0.fcfg',trace=2)
    trees = cp.parse(tokens1)
    # |.Kim .like.chil.|
    # Leaf Init Rule:
    # |[----]    .    .| [0:1] 'Kim'
    # |.    [----]    .| [1:2] 'likes'
    # |.    .    [----]| [2:3] 'children'
    # Feature Bottom Up Predict Combine Rule:
    # |[----]    .    .| [0:1] PropN[NUM='sg'] -> 'Kim' *
    # Feature Bottom Up Predict Combine Rule:
    # |[----]    .    .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] *
    # Feature Bottom Up Predict Combine Rule:
    # |[---->    .    .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'}
    # Feature Bottom Up Predict Combine Rule:
    # |.    [----]    .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' *
    # Feature Bottom Up Predict Combine Rule:
    # |.    [---->    .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'}
    # Feature Bottom Up Predict Combine Rule:
    # |.    .    [----]| [2:3] N[NUM='pl'] -> 'children' *
    # Feature Bottom Up Predict Combine Rule:
    # |.    .    [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] *
    # Feature Bottom Up Predict Combine Rule:
    # |.    .    [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'}
    # Feature Single Edge Fundamental Rule:
    # |.    [---------]| [1:3] VP[NUM='sg', TENSE='pres'] -> TV[NUM='sg', TENSE='pres'] NP[] *
    # Feature Single Edge Fundamental Rule:
    # |[==============]| [0:3] S[] -> NP[NUM='sg'] VP[NUM='sg'] *
    # for tree in trees:
    #     print(tree)
    # (S[]
    #   (NP[NUM='sg'] (PropN[NUM='sg'] Kim))
    #   (VP[NUM='sg', TENSE='pres']
    #     (TV[NUM='sg', TENSE='pres'] likes)
    #     (NP[NUM='pl'] (N[NUM='pl'] children))))
    

    术语

    2.处理特征结构

    fs1 = nltk.FeatStruct(TENSE = 'past',NUM = 'sg')
    print(fs1)
    # |.Kim .like.chil.|
    # Leaf Init Rule:
    # |[----]    .    .| [0:1] 'Kim'
    # |.    [----]    .| [1:2] 'likes'
    # |.    .    [----]| [2:3] 'children'
    # Feature Bottom Up Predict Combine Rule:
    # |[----]    .    .| [0:1] PropN[NUM='sg'] -> 'Kim' *
    # Feature Bottom Up Predict Combine Rule:
    # |[----]    .    .| [0:1] NP[NUM='sg'] -> PropN[NUM='sg'] *
    # Feature Bottom Up Predict Combine Rule:
    # |[---->    .    .| [0:1] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'sg'}
    # Feature Bottom Up Predict Combine Rule:
    # |.    [----]    .| [1:2] TV[NUM='sg', TENSE='pres'] -> 'likes' *
    # Feature Bottom Up Predict Combine Rule:
    # |.    [---->    .| [1:2] VP[NUM=?n, TENSE=?t] -> TV[NUM=?n, TENSE=?t] * NP[] {?n: 'sg', ?t: 'pres'}
    # Feature Bottom Up Predict Combine Rule:
    # |.    .    [----]| [2:3] N[NUM='pl'] -> 'children' *
    # Feature Bottom Up Predict Combine Rule:
    # |.    .    [----]| [2:3] NP[NUM='pl'] -> N[NUM='pl'] *
    # Feature Bottom Up Predict Combine Rule:
    # |.    .    [---->| [2:3] S[] -> NP[NUM=?n] * VP[NUM=?n] {?n: 'pl'}
    # Feature Single Edge Fundamental Rule:
    # |.    [---------]| [1:3] VP[NUM='sg', TENSE='pres'] -> TV[NUM='sg', TENSE='pres'] NP[] *
    # Feature Single Edge Fundamental Rule:
    # |[==============]| [0:3] S[] -> NP[NUM='sg'] VP[NUM='sg'] *
    # [ NUM   = 'sg'   ]
    # [ TENSE = 'past' ]
    
    fs2 = nltk.FeatStruct(PER = 3, NUM = 'pl',GND = 'fem')
    print(fs2['GND'])
    

    指定特征结构

    fs3 = nltk.FeatStruct(POS = 'N',AGR = fs1)
    print(fs3)
    # [ AGR = [ NUM   = 'sg'   ] ]
    # [       [ TENSE = 'past' ] ]
    # [                          ]
    # [ POS = 'N'                ]
    
    fs4 = nltk.FeatStruct("[POS='N',AGR = [PER = 3, NUM = 'pl',GND = 'fem']]")
    print(fs4)
    # [       [ GND = 'fem' ] ]
    # [ AGR = [ NUM = 'pl'  ] ]
    # [       [ PER = 3     ] ]
    # [                       ]
    # [ POS = 'N'             ]
    

    一个更一般的特征结构包含一个较一般的。

    合并两个特征结构的信息被称为统一

    包含的特征结构是偏序的。

    [FS_0包含FS_1,当FS_0比FS_1更一般(较少信息)的时候 ]

    fs0 = nltk.FeatStruct("""[NAME = Lee,
                           ADDRESS = (1)[NUMBER = 74,
                                      STREET = 'rue Pascal'],
                           SPOUSE = [NAME = Kim,
                                    ADDRESS->(1)]]""")
    print(fs0)
    fs0_1 = nltk.FeatStruct("[SPOUSE = [ADDRESS = [CITY = Paris]]]")
    print(fs0_1.unify(fs0))
    
    fs0_2 = nltk.FeatStruct("[ADDRESS1 = [NUMBER = 74,STREET = 'rue Pascal']]")
    f20_3 = nltk.FeatStruct("[ADDRESS1 = ?x,ADDRESS2 = ?x]")#结构共享用标了表示
    print(f20_3.unify(fs0_2))
    

    3.扩展基于特征的文法

    我们可以使用特征结构对大量广泛语言学现象进行简洁的分析,包括动词子类别、倒装结构、无限制依赖结构和格支配。

    在本节,将会探索各种语言问题,并展示将特征纳入文法的好处。

    子类别

    核心词回顾

    助动词与倒装

    无限制依赖成分

    具有倒装从句和长距离依赖的产生式的文法,使用斜线类别

    print(nltk.data.show_cfg('grammars/book_grammars/feat1.fcfg'))
    # % start S
    # # ###################
    # # Grammar Productions
    # # ###################
    # S[-INV] -> NP VP
    # S[-INV]/?x -> NP VP/?x
    # S[-INV] -> NP S/NP
    # S[-INV] -> Adv[+NEG] S[+INV]
    # S[+INV] -> V[+AUX] NP VP
    # S[+INV]/?x -> V[+AUX] NP VP/?x
    # SBar -> Comp S[-INV]
    # SBar/?x -> Comp S[-INV]/?x
    # VP -> V[SUBCAT=intrans, -AUX]
    # VP -> V[SUBCAT=trans, -AUX] NP
    # VP/?x -> V[SUBCAT=trans, -AUX] NP/?x
    # VP -> V[SUBCAT=clause, -AUX] SBar
    # VP/?x -> V[SUBCAT=clause, -AUX] SBar/?x
    # VP -> V[+AUX] VP
    # VP/?x -> V[+AUX] VP/?x
    # # ###################
    # # Lexical Productions
    # # ###################
    # V[SUBCAT=intrans, -AUX] -> 'walk' | 'sing'
    # V[SUBCAT=trans, -AUX] -> 'see' | 'like'
    # V[SUBCAT=clause, -AUX] -> 'say' | 'claim'
    # V[+AUX] -> 'do' | 'can'
    # NP[-WH] -> 'you' | 'cats'
    # NP[+WH] -> 'who'
    # Adv[+NEG] -> 'rarely' | 'never'
    # NP/NP ->
    # Comp -> 'that'
    # None
    
    tokens1 = 'who do you claim that you like'.split()
    from nltk import load_parser
    cp = load_parser('grammars/book_grammars/feat1.fcfg')
    for tree in cp.parse(tokens1):
         print(tree)#有缺口的句子
    # (S[-INV]
    #   (NP[+WH] who)
    #   (S[+INV]/NP[]
    #     (V[+AUX] do)
    #     (NP[-WH] you)
    #     (VP[]/NP[]
    #       (V[-AUX, SUBCAT='clause'] claim)
    #       (SBar[]/NP[]
    #         (Comp[] that)
    #         (S[-INV]/NP[]
    #           (NP[-WH] you)
    #           (VP[]/NP[] (V[-AUX, SUBCAT='trans'] like) (NP[]/NP[] )))))))
    

    没有缺口的句子

    tokens2 = 'you claim that you like cats'.split()
    for tree in cp.parse(tokens2):
        print(tree)
        tree.draw()
        
    # (S[-INV]
    #   (NP[-WH] you)
    #   (VP[]
    #     (V[-AUX, SUBCAT='clause'] claim)
    #     (SBar[]
    #       (Comp[] that)
    #       (S[-INV]
    #         (NP[-WH] you)
    #         (VP[] (V[-AUX, SUBCAT='trans'] like) (NP[-WH] cats))))))
    
    tokens3 = 'rarely do you sing'.split()
    for tree in cp.parse(tokens3):
        print(tree)
        tree.draw()
    # (S[-INV]
    #   (Adv[+NEG] rarely)
    #   (S[+INV]
    #     (V[+AUX] do)
    #     (NP[-WH] you)
    #     (VP[] (V[-AUX, SUBCAT='intrans'] sing))))
    
  • 相关阅读:
    如何判断某个设备文件是否存在
    shell中export理解误区
    linux命令之tail
    国内较快的gnu镜像:北京交通大学镜像
    Cmake的交叉编译
    linux 命令之grep
    makefile之变量赋值
    makefile之VPATH和vpath的使用
    arm汇编进入C函数分析,C函数压栈,出栈,传参,返回值
    Jlink 软件断点和硬件断点
  • 原文地址:https://www.cnblogs.com/nxf-rabbit75/p/9574142.html
Copyright © 2011-2022 走看看