zoukankan      html  css  js  c++  java
  • 关于alzheimer disease论文的统计

    1.获取2016年的所有关键字,保存到keyword_2016.json中

    import pymysql
    import json
    
    conn= pymysql.connect(
            host='localhost',
            port = 3306,
            user='root',
            passwd='',
            db ='python',
            )
    cursor = conn.cursor()
    
    sql = "SELECT union_kwd_str,pmc_id FROM alzheimer where pub_year = '2016' && union_kwd_str != '' "
    a = cursor.execute(sql)
    print a
    b = cursor.fetchmany(a)  #b has 7887 abstract list
    
    abstract_list = []
    pmc_id_dict= {}
    
    for j in range(a):
        abstract_list.append(b[j][0])
        pmc_id_dict[j] = b[j][1]
    
    
    
    def output_to_json(data,filename):
        with open(filename,'w') as file:
            file.write(json.dumps(data))
            file.close()
        return json.dumps(data)
    
    output_data = {
            'pub_year': "2016",
            'count': a,
            'keyword': abstract_list
        }
    output_to_json(output_data, 'keyword_2016.json')

    从keyword_2016。json中读取关键词,并统计选出前25的关键词

    import re  
    import collections  
    import json
    
    def input_from_json(filename):
        with open(filename,'r') as file:
            data = json.loads(file.read())
            file.close()
            return data
    
    def count_word(path):  
        result = {}
        keyword_list = input_from_json(path)['keyword']  
        for all_the_text in keyword_list:
            for word in all_the_text.split(','): 
                if word not in result:  
                    result[word] = 0  
                result[word] += 1                 
        return result
      
          
    
     
    def sort_by_count(d):  
    
        d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
        return d  
    
     
    if __name__ == '__main__':  
        file_name = "keyword_2016.json"  
        fobj2 = open('sort_keyword_2016.json','w')
     
        dword = count_word(file_name)  
        dword = sort_by_count(dword)  
          
        jsonlist = []
        num = 0
    
        for key,value in dword.items():
            num += 1
            key = re.sub("_", " ", key)
            data = {
            'name': key,
            'value': value
            }
            json_data = json.dumps(data)
    
            if num < 25:
                fobj2.write(json_data)
                fobj2.write(',')
            if num == 25:
                fobj2.write(json_data)
            
    

      

    2.获取发表论文量排名前十的国家

    1)把所有第一作者的信息保存到authorinfor.json中

    import pymysql
    import json
    
    conn= pymysql.connect(
            host='localhost',
            port = 3306,
            user='root',
            passwd='',
            db ='python',
            )
    cursor = conn.cursor()
    
    sql = "SELECT authorinfor,pmc_id FROM alzheimer WHERE authorinfor != ''"
    a = cursor.execute(sql)
    print a
    b = cursor.fetchmany(a)  #b has 7887 abstract list
    
    authorinfor_list = []
    pmc_id_dict= {}
    
    for j in range(a):
        authorinfor_list.append(b[j][0])
        pmc_id_dict[j] = b[j][1]
    
    def output_to_json(data,filename):
        with open(filename,'w') as file:
            file.write(json.dumps(data))
            file.close()
        return json.dumps(data)
    
    output_data = {
            'pub_year': "2016",
            'count': a,
            'authorinfor': authorinfor_list,
            'pmc_id': pmc_id_dict
        }
    output_to_json(output_data, 'authorinfor.json')

    2)选出排名前十的国家

    import re  
    import collections  
    import json
    
    def input_from_json(filename):
        with open(filename,'r') as file:
            data = json.loads(file.read())
            file.close()
            return data
    
    def count_word(path):  
        result = {}
        authorinfor_list = input_from_json(path)['authorinfor']  
        for all_the_text in authorinfor_list:
            country = all_the_text.split(',')[-1]
            country = re.sub(".","",country)
            country = re.sub("\n","",country)
            country = country.encode('utf-8')
    
            if country not in result:
                result[country] = 0
            result[country] += 1                
        return result 
          
     
    def sort_by_count(d):  
    
        d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
        return d  
    
     
    if __name__ == '__main__':  
        file_name = "authorinfor.json"  
        fobj2 = open('sort_country.json','w')
     
        dword = count_word(file_name)  
        dword = sort_by_count(dword)  
          
        jsonlist = []
        num = 0
    
        for country,value in dword.items():
            num += 1
            data = {
            'name': country,
            'value': value
            }
            json_data = json.dumps(data)
    
            if num < 50:
                fobj2.write(json_data)
                fobj2.write('
    ')
    
        countrylist = dword.keys()
        valuelist = dword.values()
    
        print countrylist[:11]
        print valuelist[:11]
  • 相关阅读:
    springboot实现redis的分布式锁
    剑指offer--二维数组中查找
    剑指offer--二维数组中查找
    对JDK动态代理的模拟实现
    Spring(4)AOP
    设计模式之单例模式(Java)
    【Java并发系列】--Java内存模型
    maven 解决jar包冲突及简单使用
    基于注解的SpringAOP源码解析(三)
    Java代码中可以优化性能的小细节
  • 原文地址:https://www.cnblogs.com/lovely7/p/6178829.html
Copyright © 2011-2022 走看看