zoukankan      html  css  js  c++  java
  • 关于alzheimer disease论文的统计

    1.获取2016年的所有关键字,保存到keyword_2016.json中

    import pymysql
    import json
    
    conn= pymysql.connect(
            host='localhost',
            port = 3306,
            user='root',
            passwd='',
            db ='python',
            )
    cursor = conn.cursor()
    
    sql = "SELECT union_kwd_str,pmc_id FROM alzheimer where pub_year = '2016' && union_kwd_str != '' "
    a = cursor.execute(sql)
    print a
    b = cursor.fetchmany(a)  #b has 7887 abstract list
    
    abstract_list = []
    pmc_id_dict= {}
    
    for j in range(a):
        abstract_list.append(b[j][0])
        pmc_id_dict[j] = b[j][1]
    
    
    
    def output_to_json(data,filename):
        with open(filename,'w') as file:
            file.write(json.dumps(data))
            file.close()
        return json.dumps(data)
    
    output_data = {
            'pub_year': "2016",
            'count': a,
            'keyword': abstract_list
        }
    output_to_json(output_data, 'keyword_2016.json')

    从keyword_2016。json中读取关键词,并统计选出前25的关键词

    import re  
    import collections  
    import json
    
    def input_from_json(filename):
        with open(filename,'r') as file:
            data = json.loads(file.read())
            file.close()
            return data
    
    def count_word(path):  
        result = {}
        keyword_list = input_from_json(path)['keyword']  
        for all_the_text in keyword_list:
            for word in all_the_text.split(','): 
                if word not in result:  
                    result[word] = 0  
                result[word] += 1                 
        return result
      
          
    
     
    def sort_by_count(d):  
    
        d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
        return d  
    
     
    if __name__ == '__main__':  
        file_name = "keyword_2016.json"  
        fobj2 = open('sort_keyword_2016.json','w')
     
        dword = count_word(file_name)  
        dword = sort_by_count(dword)  
          
        jsonlist = []
        num = 0
    
        for key,value in dword.items():
            num += 1
            key = re.sub("_", " ", key)
            data = {
            'name': key,
            'value': value
            }
            json_data = json.dumps(data)
    
            if num < 25:
                fobj2.write(json_data)
                fobj2.write(',')
            if num == 25:
                fobj2.write(json_data)
            
    

      

    2.获取发表论文量排名前十的国家

    1)把所有第一作者的信息保存到authorinfor.json中

    import pymysql
    import json
    
    conn= pymysql.connect(
            host='localhost',
            port = 3306,
            user='root',
            passwd='',
            db ='python',
            )
    cursor = conn.cursor()
    
    sql = "SELECT authorinfor,pmc_id FROM alzheimer WHERE authorinfor != ''"
    a = cursor.execute(sql)
    print a
    b = cursor.fetchmany(a)  #b has 7887 abstract list
    
    authorinfor_list = []
    pmc_id_dict= {}
    
    for j in range(a):
        authorinfor_list.append(b[j][0])
        pmc_id_dict[j] = b[j][1]
    
    def output_to_json(data,filename):
        with open(filename,'w') as file:
            file.write(json.dumps(data))
            file.close()
        return json.dumps(data)
    
    output_data = {
            'pub_year': "2016",
            'count': a,
            'authorinfor': authorinfor_list,
            'pmc_id': pmc_id_dict
        }
    output_to_json(output_data, 'authorinfor.json')

    2)选出排名前十的国家

    import re  
    import collections  
    import json
    
    def input_from_json(filename):
        with open(filename,'r') as file:
            data = json.loads(file.read())
            file.close()
            return data
    
    def count_word(path):  
        result = {}
        authorinfor_list = input_from_json(path)['authorinfor']  
        for all_the_text in authorinfor_list:
            country = all_the_text.split(',')[-1]
            country = re.sub(".","",country)
            country = re.sub("\n","",country)
            country = country.encode('utf-8')
    
            if country not in result:
                result[country] = 0
            result[country] += 1                
        return result 
          
     
    def sort_by_count(d):  
    
        d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
        return d  
    
     
    if __name__ == '__main__':  
        file_name = "authorinfor.json"  
        fobj2 = open('sort_country.json','w')
     
        dword = count_word(file_name)  
        dword = sort_by_count(dword)  
          
        jsonlist = []
        num = 0
    
        for country,value in dword.items():
            num += 1
            data = {
            'name': country,
            'value': value
            }
            json_data = json.dumps(data)
    
            if num < 50:
                fobj2.write(json_data)
                fobj2.write('
    ')
    
        countrylist = dword.keys()
        valuelist = dword.values()
    
        print countrylist[:11]
        print valuelist[:11]
  • 相关阅读:
    Getting started with 3G | ip.access nano3G+OpenBSC+Osmocom-bb Part 1
    Metasploit的射频收发器功能 | Metasploit’s RF Transceiver Capabilities
    分析无线遥控器信号并制作Hack硬件进行攻击
    利用Hackrf One进行GPS定位欺骗制作超级跑马机
    使用OpenBTS基站测试物联网模块安全性
    在cmd中为命令设置别名以及启动cmd时自动执行bat
    Struts2、spring2、hibernate3在SSH中各起什么作用
    switch omega
    html转译字符 字符实体
    excel自定义数据验证
  • 原文地址:https://www.cnblogs.com/lovely7/p/6178829.html
Copyright © 2011-2022 走看看