zoukankan      html  css  js  c++  java
  • python jieba分词

    #!/usr/bin/python
    # -*- coding: UTF-8 -*-
    import jieba
    import jieba.analyse
    import pymysql
    
    id = 1
    # shoes.txt中的标签库
    tag_ku = []
    #精确模式分成的tag
    tag = []
    # 打开标签库
    f = open('D:spidershoes.txt','r',encoding='utf-8')
    f.seek(0)
    #从标签库中读取所有数据,并将每行内容作为一个元素存在data列表里
    data = f.read().splitlines()
    # 将每行第一个词条即标签读出来
    for tag_line in data:
        tag_ku.append(tag_line.split(' ')[0])
    # jiebashe'zhiz自定义词库
    jieba.set_dictionary('./shoes.txt')
    # 连接数据库
    coon = pymysql.connect(user='root', password='root', host='127.0.0.1', port=3306, database='bishe_shoes',use_unicode=True, charset="utf8")
    cursor = coon.cursor()
    # 根据id从数据库读取内容
    while id <=100000:
        print(id)
        # 读取商品名称
        cursor.execute("select shoes_name from shoes where id ={}".format(id))
        shoes_name = cursor.fetchone()[0]
        print(shoes_name)
        # 对商品名称根据自定义词库精准分词
        result = list(jieba.cut(shoes_name, cut_all=False))
        result = list(result)
        print(result)
        shoes_ku = ''
        # 对生成的关键词进行过滤,将符合条件的关键词保存到shoes_ku中
        for each in result:
            if each in tag_ku:
                #对一些特殊标签进行处理
                if each == 'Massimo':
                    shoes_ku = shoes_ku + 'Massimo Dutti' + '|'
                elif each == 'WHAT':
                    shoes_ku = shoes_ku + 'WHAT FOR' + '|'
                elif each == '' or each == '男款' or each == '男鞋':
                    shoes_ku = shoes_ku + '男鞋' + '|'
                elif each == '' or each == '女款' or each == '女鞋':
                    shoes_ku = shoes_ku + '女鞋' + '|'
                elif each == 'Kiss':
                    shoes_ku = shoes_ku + 'KissKitty' + '|'
                elif each == 'URBAN':
                    shoes_ku = shoes_ku + 'URBAN REVIVO' + '|'
                elif each == 'Jimmy':
                    shoes_ku = shoes_ku + 'Jimmy Choo' + '|'
                elif each == 'Inking':
                    shoes_ku = shoes_ku + 'Inking Pot' + '|'
                elif each == 'Miss':
                    shoes_ku = shoes_ku + 'Miss Sixty' + '|'
                elif each == 'Martens':
                    shoes_ku = shoes_ku + 'Dr.Martens' + '|'
                else:
                    shoes_ku = shoes_ku + each + '|'
        print(shoes_ku)
        # 将生成的商品的标签保存到数据库中
        sql = "update shoes set tag = '{0}' where id = {1}".format(shoes_ku,id)
        print(sql)
        cursor.execute(sql)
        coon.commit()
        id = id + 1
    cursor.close()

    运行结果:

  • 相关阅读:
    2019计蒜之道初赛第三场题解
    牛客小白月赛14 :部分题目总结
    CF-558:部分题目总结
    浙江省第十六届大学生ACM程序设计竞赛部分题解
    浙江省高职院校联合训练(一)
    CF-544:部分题目总结
    CF-552E-Two Teams
    CF-551:部分题目总结
    freemarker使用map替换字符串中的值
    freemarker使用map替换ftl中相关值
  • 原文地址:https://www.cnblogs.com/qilin20/p/12284570.html
Copyright © 2011-2022 走看看