zoukankan      html  css  js  c++  java
  • python goatools使用

    用david搞定了所有的GOterm后,接下来就是利用goslim处理这些term。

    用的包是goatools,需要下载几个obo文件以及,fisher,pygraphviz以及graphviz等几个模块。

    # -*- coding: utf-8 -*-
    """
    Created on Fri Nov 21 20:06:42 2014
    
    @author: hluo
    """
    
    #!/usr/bin/python
    # -*- coding: UTF-8 -*-
    
    import os
    import os.path as op
    import sys
    sys.path.insert(0, op.join(op.dirname(__file__), ".."))
    from obo_parser import GODag
    from mapslim import mapslim
    import json
    import re
    #from goatools.mapslim import mapslim
    
    
    # copied from find_enrichment.pydef get_goslim(term, godag, goslimdag):
        if not term in godag:
            return [None, None]
        else:
            direct_anc, all_anc = mapslim(term, godag, goslimdag)
            return [direct_anc, all_anc]
            
    
    def mygofun(json_file):
        obo_file = '/home/hluo/Desktop/goslim/go-basic.obo'
        assert os.path.exists(obo_file), "file %s not found!" % obo_file
    
        slim_obo_file = '/home/hluo/Desktop/goslim/goslim_generic.obo'
        assert os.path.exists(slim_obo_file), "file %s not found!" % slim_obo_file
    
        # load DAGs
        go_dag = GODag(obo_file)
        goslim_dag = GODag(slim_obo_file)
    
        #json_file = 'NC_000913.gbk.json'
        myrecord = json.load(open(json_file))
    
        #re_obj = re.compile(r'GO:d+')
        re_obj = re.compile(r'(?<=$)S+(?=~)')
        #a =  re.match(re_Obj, text)
    
        #mylist = []
        for item in myrecord:
            mykey = filter(lambda x: x.startswith('GO'), item.keys())
            for k in mykey:
                tplist = [set(), set()]
                for text in item[k]:
                    tplist1 = []
                    goterm = re.findall(re_obj, text)[0]
                    tplist1 = get_goslim(goterm, go_dag, goslim_dag)
                    tplist[0] |= tplist1[0]
                    tplist[1] |= tplist1[1]
                item[k + '_dslim'] = list(tplist[0])
                item[k + '_aslim'] = list(tplist[1])
        json.dump(myrecord, open('%s.txt' % json_file, 'w'), indent = 1)
            
    if __name__ == '__main__':
        mygofun(NC_000913.gbk.json)

    the script loads the json format file, and adds the new keys to every record in the file.

    ps. dslim: direct slim. aslim: all slim.

    Then I run a python batch script to process all the json files.

    # -*- coding: utf-8 -*-
    """
    Created on Mon Nov 24 17:37:24 2014
    
    @author: hluo
    """
    
    import os
    import reimport sys
    from mygoslim import mygofun
    if __name__ == '__main__':
        mydir = '/home/hluo/Desktop/gbk'
        flist = os.listdir(mydir)
        re_obj = re.compile('.json$')
        #re_obj1 = re.compile('NC_d{6}.gbk')
        json_file_list = []
        for item in flist:
            re_item = re.findall(re_obj, item)
            if re_item:
                json_file_list.append(item)
        
        for item in json_file_list:
            mygofun('%s/%s' % (mydir, item))


    In the script, use the 're'  and 'os' module to get all the json files.

  • 相关阅读:
    Linux基础命令-pwd
    Linux基础命令-sosreport
    c#设置xml内容不换行;添加属性为xsi:nil="true"的空节点
    sql ROW_NUMBER() 排序函数
    服务器×××上的MSDTC不可用解决办法
    MSDTC Service的访问权限
    setTimeout
    ASP.NET中Request.Form中文乱码的解决方
    js实现小数点后保留N位并可以四舍五入——js对float数据的处理
    Meta标签中的viewport属性含义及设置
  • 原文地址:https://www.cnblogs.com/hluo/p/4121347.html
Copyright © 2011-2022 走看看