zoukankan      html  css  js  c++  java
  • hanlp入门 (含标准分词、NLP分词、索引分词、N-最短路径分词、CRF分词、极速词典分词、自定义分词)

    直接给代码

     1 #-*- coding:utf-8 -*-
     2 from pyhanlp import *
     3 
     4 # 中文分词
     5 print(HanLP.segment('你好,欢迎在Python中调用HanLP的API'))
     6 print("-" * 70)
     7 
     8 print("=" * 30 + "标准分词" + "=" * 30)
     9 StandardTokenizer = JClass('com.hankcs.hanlp.tokenizer.StandardTokenizer')
    10 print(StandardTokenizer.segment('你好,欢迎在Python中调用HanLP的API'))
    11 print("-" * 70)
    12 
    13 # NLP分词NLPTokenizer会执行全部命名实体识别和词性标注
    14 print("=" * 30 + "NLP分词" + "=" * 30)
    15 NLPTokenizer = JClass('com.hankcs.hanlp.tokenizer.NLPTokenizer')
    16 print(NLPTokenizer.segment('中国科学院计算技术研究所的宗成庆教授正在教授自然语言处理课程'))
    17 print("-" * 70)
    18 
    19 print("=" * 30 + "索引分词" + "=" * 30)
    20 IndexTokenizer = JClass('com.hankcs.hanlp.tokenizer.IndexTokenizer')
    21 termList = IndexTokenizer.segment("主副食品")
    22 for term in termList:
    23     print(str(term) + " [" + str(term.offset) + ":" + str(term.offset + len(term.word)) + "]")
    24 print("-" * 70)
    25 
    26 print("=" * 30 + " N-最短路径分词" + "=" * 30)
    27 # CRFSegment = JClass('com.hankcs.hanlp.seg.CRF.CRFSegment')
    28 # segment=CRFSegment()
    29 # testCase ="今天,刘志军案的关键人物,山西女商人丁书苗在市二中院出庭受审。"
    30 # print(segment.seg("你看过穆赫兰道吗"))
    31 print("-" * 70)
    32 
    33 print("=" * 30 + " CRF分词" + "=" * 30)
    34 print("-" * 70)
    35 
    36 print("=" * 30 + " 极速词典分词" + "=" * 30)
    37 SpeedTokenizer = JClass('com.hankcs.hanlp.tokenizer.SpeedTokenizer')
    38 print(NLPTokenizer.segment('江西鄱阳湖干枯,中国最大淡水湖变成大草原'))
    39 print("-" * 70)
    40 
    41 print("=" * 30 + " 自定义分词" + "=" * 30)
    42 CustomDictionary = JClass('com.hankcs.hanlp.dictionary.CustomDictionary')
    43 CustomDictionary.add('攻城狮')
    44 CustomDictionary.add('单身狗')
    45 HanLP = JClass('com.hankcs.hanlp.HanLP')
    46 print(HanLP.segment('攻城狮逆袭单身狗,迎娶白富美,走上人生巅峰'))
    47 print("-" * 70)
  • 相关阅读:
    Java基础知识➣面向对象(八)
    Linux(CentOS7)安装Tomcat
    Java基础知识➣发送Emai和访问MySQL数据库(七)
    Java基础知识➣网络Socket(六)
    JS 的点点滴滴
    git 快速入门(二)
    zxing 生成二维码
    js生成二维码
    Markdown简介
    java常用string inputStream转换
  • 原文地址:https://www.cnblogs.com/smartisn/p/13822711.html
Copyright © 2011-2022 走看看