zoukankan      html  css  js  c++  java
  • 西游记关键字提取和语句分词

    #!/usr/bin/env python
    # -*- coding:utf-8 -*-
    """
    str1 = 'as,gh,rt,ujrk'
    str2 = ','
    str1 = str1[str1.find(str2)+1:]
    print(str1)
    s='as,gh,rt,ujrk'
    print(s.split(','))
    import re
    pattern = re.compile(r'hello.*!')
    match = pattern.match('hello,aklhgslhgfhg!gfdh')
    if match:
        print(match.group())
    """
    import jieba
    import time
    import sys
    import jieba.analyse as analyse
    
    lines = open(u'西游记.txt',encoding='gb18030').read()
    print(' '.join(analyse.extract_tags(lines,topK=20,withWeight=False,
                                        allowPOS=())))
    """
    jieba.enable_parallel(4)  #并行模式只支持POSIX系统
    content = open(u'西游记.txt',"r").read()
    t1 = time.time()
    words = "/".join(jieba.cut(content))
    t2 = time.time()
    tm_cost = t2-t1
    print('并行速度为:%s bytes/second'% (len(content)/tm_cost))
    """
    jieba.disable_parallel()
    content = open(u'西游记.txt',"r",encoding='gb18030', errors='ignore').read()
    t1 = time.time()
    words = "/".join(jieba.cut(content))
    t2 = time.time()
    tm_cost = t2-t1
    print('非并行速度为:%s bytes/second'% (len(content)/tm_cost))
    
    list = jieba.cut('我在学习自然语言处理',cut_all=False)
    print(list)
    print('/'.join(list))
    print("/".join(jieba.cut('如果放到旧字典中将出错',HMM=False)))
    jieba.suggest_freq(('中','将'),True)
    print('/'.join(jieba.cut('如果放到旧字典中将出错',HMM=False)))
    
    line = open('西游记.txt',encoding='gb18030').read()
    print(" ".join(analyse.textrank(line,topK=20,withWeight=False,
                                    allowPOS=('ns','n','v','vn'))))
    

      

  • 相关阅读:
    Vue 实时过去 页面宽高
    css 全网页呈现灰色
    Vue 省市区三级联动(基于ElementUI)
    小程序 节流函数 (防止按钮重复点击)
    Vs Code 自动编译TS
    VS2019 调试时出现:正试图在 os 加载程序锁内执行托管代码
    SQLServer 日期函数大全
    SQL 日期函数
    SQL Server 查询性能异常语句
    SQL中多表连接delete删除表数据
  • 原文地址:https://www.cnblogs.com/lifengwu/p/10028443.html
Copyright © 2011-2022 走看看