zoukankan      html  css  js  c++  java
  • 关于alzheimer disease论文的统计

    1.获取2016年的所有关键字,保存到keyword_2016.json中

    import pymysql
    import json
    
    conn= pymysql.connect(
            host='localhost',
            port = 3306,
            user='root',
            passwd='',
            db ='python',
            )
    cursor = conn.cursor()
    
    sql = "SELECT union_kwd_str,pmc_id FROM alzheimer where pub_year = '2016' && union_kwd_str != '' "
    a = cursor.execute(sql)
    print a
    b = cursor.fetchmany(a)  #b has 7887 abstract list
    
    abstract_list = []
    pmc_id_dict= {}
    
    for j in range(a):
        abstract_list.append(b[j][0])
        pmc_id_dict[j] = b[j][1]
    
    
    
    def output_to_json(data,filename):
        with open(filename,'w') as file:
            file.write(json.dumps(data))
            file.close()
        return json.dumps(data)
    
    output_data = {
            'pub_year': "2016",
            'count': a,
            'keyword': abstract_list
        }
    output_to_json(output_data, 'keyword_2016.json')

    从keyword_2016。json中读取关键词,并统计选出前25的关键词

    import re  
    import collections  
    import json
    
    def input_from_json(filename):
        with open(filename,'r') as file:
            data = json.loads(file.read())
            file.close()
            return data
    
    def count_word(path):  
        result = {}
        keyword_list = input_from_json(path)['keyword']  
        for all_the_text in keyword_list:
            for word in all_the_text.split(','): 
                if word not in result:  
                    result[word] = 0  
                result[word] += 1                 
        return result
      
          
    
     
    def sort_by_count(d):  
    
        d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
        return d  
    
     
    if __name__ == '__main__':  
        file_name = "keyword_2016.json"  
        fobj2 = open('sort_keyword_2016.json','w')
     
        dword = count_word(file_name)  
        dword = sort_by_count(dword)  
          
        jsonlist = []
        num = 0
    
        for key,value in dword.items():
            num += 1
            key = re.sub("_", " ", key)
            data = {
            'name': key,
            'value': value
            }
            json_data = json.dumps(data)
    
            if num < 25:
                fobj2.write(json_data)
                fobj2.write(',')
            if num == 25:
                fobj2.write(json_data)
            
    

      

    2.获取发表论文量排名前十的国家

    1)把所有第一作者的信息保存到authorinfor.json中

    import pymysql
    import json
    
    conn= pymysql.connect(
            host='localhost',
            port = 3306,
            user='root',
            passwd='',
            db ='python',
            )
    cursor = conn.cursor()
    
    sql = "SELECT authorinfor,pmc_id FROM alzheimer WHERE authorinfor != ''"
    a = cursor.execute(sql)
    print a
    b = cursor.fetchmany(a)  #b has 7887 abstract list
    
    authorinfor_list = []
    pmc_id_dict= {}
    
    for j in range(a):
        authorinfor_list.append(b[j][0])
        pmc_id_dict[j] = b[j][1]
    
    def output_to_json(data,filename):
        with open(filename,'w') as file:
            file.write(json.dumps(data))
            file.close()
        return json.dumps(data)
    
    output_data = {
            'pub_year': "2016",
            'count': a,
            'authorinfor': authorinfor_list,
            'pmc_id': pmc_id_dict
        }
    output_to_json(output_data, 'authorinfor.json')

    2)选出排名前十的国家

    import re  
    import collections  
    import json
    
    def input_from_json(filename):
        with open(filename,'r') as file:
            data = json.loads(file.read())
            file.close()
            return data
    
    def count_word(path):  
        result = {}
        authorinfor_list = input_from_json(path)['authorinfor']  
        for all_the_text in authorinfor_list:
            country = all_the_text.split(',')[-1]
            country = re.sub(".","",country)
            country = re.sub("\n","",country)
            country = country.encode('utf-8')
    
            if country not in result:
                result[country] = 0
            result[country] += 1                
        return result 
          
     
    def sort_by_count(d):  
    
        d = collections.OrderedDict(sorted(d.items(), key = lambda t: -t[1]))  
        return d  
    
     
    if __name__ == '__main__':  
        file_name = "authorinfor.json"  
        fobj2 = open('sort_country.json','w')
     
        dword = count_word(file_name)  
        dword = sort_by_count(dword)  
          
        jsonlist = []
        num = 0
    
        for country,value in dword.items():
            num += 1
            data = {
            'name': country,
            'value': value
            }
            json_data = json.dumps(data)
    
            if num < 50:
                fobj2.write(json_data)
                fobj2.write('
    ')
    
        countrylist = dword.keys()
        valuelist = dword.values()
    
        print countrylist[:11]
        print valuelist[:11]
  • 相关阅读:
    ubuntu分屏终端
    Xcode-5.1.1更改文件盯作者
    Swift——(两)Swift访问元组
    Android Loader使用,屏幕解锁,重复荷载
    医疗信息季节:第二十三届中国国际医疗仪器设备展览会暨研讨会 思考
    UIBarButtonItem 小记边
    L轻松学习inux教程5 知识与学习bash
    Android 教你打造炫酷的ViewPagerIndicator 不仅仅是高仿MIUI
    Android Context 上下文 你必须知道的一切
    Android 自定义控件 优雅实现元素间的分割线 (支持3.0以下)
  • 原文地址:https://www.cnblogs.com/lovely7/p/6178829.html
Copyright © 2011-2022 走看看