zoukankan      html  css  js  c++  java
  • 进度三

    今天完成了对行业和行业单位进行匹配 以及可视化,主要用到了

    difflib 来进行对数据的操作
    import pymysql
    import difflib
    
    def get_equal(str1, str2):
        data=difflib.SequenceMatcher(None, str1, str2).quick_ratio()
    
        return data
    
    
    def mysql(b,code,id):
        db = pymysql.connect(host='localhost', user='root', passwd='199126', db='kettle', port=3306, charset='utf8')
    
        # 使用cursor()方法获取操作游标
        cur = db.cursor()
    
        sql = "update keji3_copy1 set hangye = '%s',hangyedaima= %s where id = %s" % (b, code, id)
    
        cur.execute(sql)  # 像sql语句传递参数
        # 提交
        db.commit()
    
    
    def name (hynane,id):
        db1 = pymysql.connect(host='localhost', user='root', passwd='199126', db='kettle', port=3306, charset='utf8')
        cursor = db1.cursor()
    
        sql = "SELECT name,daima FROM data"
        cursor.execute(sql)
    
        datalist = []
    
        codelist = []
        alldata = cursor.fetchall()
        for s in alldata:
            datalist.append(s[0])
            codelist.append(s[1])
        #print(datalist)
        a=0
        b=''
        j=0
        code=''
        for i in datalist:
            j=j+1
            list=get_equal(i,hynane)
            if(list>a):
                a=list
                b=i
                code=codelist[j]
        print(b,code,id)
    
        return b,code,id
    
    
    db = pymysql.connect(host='localhost', user='root', passwd='199126', db='kettle', port=3306, charset='utf8')
    cursor = db.cursor()
    sql1 = "SELECT jianjie,id FROM keji3_copy1 where name is not null and hangye is not null and industry is not null"
    
    cursor.execute(sql1)
    alldata = cursor.fetchall()
    for s in alldata:
        hynane=s[0]
        id=s[1]
        print(hynane,id)
        data,data1,data2=name(hynane,id)
        mysql(data,data1,data2)
    View Code

    之前的关键词匹配

    用到了jieba库

    # encoding: utf-8
    import jieba
    import jieba.posseg as pseg #词性标注
    import jieba.analyse as anls #关键词提取
    import pymysql
    def guanjian(s):
        y=''
        for x, w in anls.extract_tags(s, topK=6,withWeight=True):
            y=y+' '+x
        return y
    
    def mysql():
        # 使用这个函数例如传入一个地址 网安大厦,当然传入的地址也不能太模糊了
    
        # 打开数据库连接
        db = pymysql.connect(host='localhost', user='root', passwd='199126', db='kettle', port=3306, charset='utf8')
        db1 = pymysql.connect(host='localhost', user='root', passwd='199126', db='kettle', port=3306, charset='utf8')
        # 使用 cursor() 方法创建一个游标对象 cursor
        cursor = db.cursor()
        cursor1 = db1.cursor()
        # SQL 查询语句
        sql = "SELECT * FROM keji3 where guanjianci is  null"
    
        try:  # 正确
            # 执行SQL语句
            cursor.execute(sql)
            # 获取所有记录列表
            # db.commit() 这是在增删改的情况下 下这条语句是提交到数据库
            results = cursor.fetchall()
            i = 0
            for row in results:
                jianjie = row[10].strip(' ')
                guanjianci=row[9]
                i+=1
                id=row[0]
                guan=guanjian(jianjie)
    
    
                try:
                    sql1 = "UPDATE keji3 SET guanjianci ='%s' where id = %s" % (guan, id)
    
                    cursor1.execute(sql1)
                    # 提交到数据库执行
                    db1.commit()
                except:
                    print("2")
    
                    # 打印结果
                print(guan, id)
    
    
    
        except:  # 发生错误
            print("1")
    
        # 关闭数据库连接
        db.close()
    mysql()
    View Code

    规范化地域和行政代码用了百度地图接口

    import requests
    import json
    import pymysql
    '''
    函数功能:根据传入的比较模糊的地址获取精确的结构化地址
    函数实现流程:利用百度地图api的正地理编码可以获取该位置的经纬度
               接着使用经纬度采用逆地址编码获取结构化地址
    '''
    
    
    def get_formatted_address(address):
        # 根据百度地图api接口获取正地址编码也就是经纬度
        url1 = 'http://api.map.baidu.com/geocoding/v3/?address=' + address + '&output=json&ak=LXkcInigDnERPxfjbGZH2hKm7kr0uO5U&callback=showLocation'
    
        # 获取经纬度
        resp1 = requests.get(url1)
        resp1_str = resp1.text
        resp1_str = resp1_str.replace('showLocation&&showLocation', '')
        resp1_str = resp1_str[1:-1]
        resp1_json = json.loads(resp1_str)
        location = resp1_json.get('result').get('location')
    
    
        # 根据经纬度获取结构化地址
        lng = location.get('lng')
        lat = location.get('lat')
        url2 = 'http://api.map.baidu.com/reverse_geocoding/v3/?ak=LXkcInigDnERPxfjbGZH2hKm7kr0uO5U&output=json&coordtype=wgs84ll&location=' + str(
            lat) + ',' + str(lng) + ''
        resp2 = requests.get(url2)
    
        resp2_json = json.loads(resp2.text)
        # 提取结构化地址
    
        province = resp2_json.get('result').get('addressComponent').get('province')
        city=resp2_json.get('result').get('addressComponent').get('city')
        district = resp2_json.get('result').get('addressComponent').get('district')
        if(province==city):
            address=city+district
        if(province!=city):
            address=province+city+district
    
    
        gj = resp2_json.get('result').get('addressComponent').get('adcode')
        return address,gj
    
    
    def mysql():
        # 使用这个函数例如传入一个地址 网安大厦,当然传入的地址也不能太模糊了
    
        # 打开数据库连接
        db = pymysql.connect(host='localhost', user='root', passwd='199126', db='kettle', port=3306, charset='utf8')
        db1 = pymysql.connect(host='localhost', user='root', passwd='199126', db='kettle', port=3306, charset='utf8')
        # 使用 cursor() 方法创建一个游标对象 cursor
        cursor = db.cursor()
        cursor1 = db1.cursor()
        # SQL 查询语句
        sql = "SELECT * FROM keji3_copy1 where id>1591  "
    
        try:  # 正确
            # 执行SQL语句
            cursor.execute(sql)
            # 获取所有记录列表
            # db.commit() 这是在增删改的情况下 下这条语句是提交到数据库
            results = cursor.fetchall()
            i = 0
            for row in results:
                danwei = row[13].split(' ')[0]
                city=row[5]
                danwei1=city+''+danwei
                id=row[0]
    
                address,gj = get_formatted_address(danwei1)
                try:
                    sql1 = "UPDATE keji3_copy1 SET guifandiyu ='%s',xzbm=%s where id = %s"% (address, gj, id)
    
    
                    cursor1.execute(sql1)
                        # 提交到数据库执行
                    db1.commit()
                except:
                    print("2")
    
                # 打印结果
                print(address,gj,id)
    
        except:  # 发生错误
            print("1")
    
        # 关闭数据库连接
        db.close()
    
    mysql()
    
    
    
    '''
    
    address,gj = get_formatted_address('天津市大港油田滩海勘探开发公司')
    print(address,gj)
    '''
    View Code
  • 相关阅读:
    Jenkins
    python爬虫
    git分布式版本控制
    CKA考试认证总结
    gitlab-ci 工具链
    gitlab-ci 模板库实践
    gitlab-cicd
    gitlab配置文件gitlab.rb详解
    局部变量表中的slot简述
    JVM系统线程类型
  • 原文地址:https://www.cnblogs.com/zlj843767688/p/12496348.html
Copyright © 2011-2022 走看看