zoukankan      html  css  js  c++  java
  • 【parser】stanfordparser demo使用

    测试站点:

    http://nlp.stanford.edu:8080/parser/index.jsp

    先贴点代码,是stanfor-parser的demo:

    import java.util.Collection;
    import java.util.List;
    import java.io.StringReader;
    
    import edu.stanford.nlp.process.TokenizerFactory;
    import edu.stanford.nlp.process.CoreLabelTokenFactory;
    import edu.stanford.nlp.process.DocumentPreprocessor;
    import edu.stanford.nlp.process.PTBTokenizer;
    import edu.stanford.nlp.ling.CoreLabel;
    import edu.stanford.nlp.ling.HasWord;
    import edu.stanford.nlp.ling.Sentence;
    import edu.stanford.nlp.trees.*;
    import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
    
    class ParserDemo {
    
      /**
       * The main method demonstrates the easiest way to load a parser.
       * Simply call loadModel and specify the path, which can either be a
       * file or any resource in the classpath.  For example, this
       * demonstrates loading from the models jar file, which you need to
       * include in the classpath for ParserDemo to work.
       */
      public static void main(String[] args) {
        LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
        if (args.length > 0) {
          demoDP(lp, args[0]);
        } else {
          demoAPI(lp);
        }
      }
    
      /**
       * demoDP demonstrates turning a file into tokens and then parse
       * trees.  Note that the trees are printed by calling pennPrint on
       * the Tree object.  It is also possible to pass a PrintWriter to
       * pennPrint if you want to capture the output.
       */
      public static void demoDP(LexicalizedParser lp, String filename) {
        // This option shows loading and sentence-segmenting and tokenizing
        // a file using DocumentPreprocessor.
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        // You could also create a tokenizer here (as below) and pass it
        // to DocumentPreprocessor
        for (List<HasWord> sentence : new DocumentPreprocessor(filename)) {
          Tree parse = lp.apply(sentence);
          parse.pennPrint();
          System.out.println();
    
          GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
          Collection tdl = gs.typedDependenciesCCprocessed();
          System.out.println(tdl);
          System.out.println();
        }
      }
    
      /**
       * demoAPI demonstrates other ways of calling the parser with
       * already tokenized text, or in some cases, raw text that needs to
       * be tokenized as a single sentence.  Output is handled with a
       * TreePrint object.  Note that the options used when creating the
       * TreePrint can determine what results to print out.  Once again,
       * one can capture the output by passing a PrintWriter to
       * TreePrint.printTree.
       */
      public static void demoAPI(LexicalizedParser lp) {
        // This option shows parsing a list of correctly tokenized words
        String[] sent = { "This", "is", "an", "easy", "sentence", "." };
        List<CoreLabel> rawWords = Sentence.toCoreLabelList(sent);
        Tree parse = lp.apply(rawWords);
        parse.pennPrint();
        System.out.println();
    
        // This option shows loading and using an explicit tokenizer
        String sent2 = "This is another sentence.";
        TokenizerFactory<CoreLabel> tokenizerFactory =
          PTBTokenizer.factory(new CoreLabelTokenFactory(), "");
        List<CoreLabel> rawWords2 =
          tokenizerFactory.getTokenizer(new StringReader(sent2)).tokenize();
        parse = lp.apply(rawWords2);
    
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
        System.out.println(tdl);
        System.out.println();
    
        TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");
        tp.printTree(parse);
      }
    
      private ParserDemo() {} // static methods only
    
    }

    结果:

    Your query
    猴子喜欢吃香蕉。
    Segmentation
    猴子
    喜欢
    吃
    香蕉
    。
    Tagging
    猴子/NR
    喜欢/VV
    吃/VV
    香蕉/NN
    。/PU
    Parse
    
    (ROOT
      (IP
        (NP (NR 猴子))
        (VP (VV 喜欢)
          (IP
            (VP (VV 吃)
              (NP (NN 香蕉)))))
        (PU 。)))
    
    Typed dependencies
    
    nsubj(喜欢-2, 猴子-1)
    root(ROOT-0, 喜欢-2)
    ccomp(喜欢-2, 吃-3)
    dobj(吃-3, 香蕉-4)
    
    Typed dependencies, collapsed
    
    nsubj(喜欢-2, 猴子-1)
    root(ROOT-0, 喜欢-2)
    ccomp(喜欢-2, 吃-3)
    dobj(吃-3, 香蕉-4)
  • 相关阅读:
    使用beautiful soup解析xml
    mongodb下载以及连接
    beautiful soup解析有空格的class
    爬取糗事百科的热门段子,以及热图链接
    结果记录
    安装自然语言处理工具Nltk以及初次使用
    AD文献分析 整体框架和数据设计
    遍历目录,目录下文件名存入文件
    dict,列表方法
    工具集
  • 原文地址:https://www.cnblogs.com/549294286/p/3067534.html
Copyright © 2011-2022 走看看