zoukankan      html  css  js  c++  java
  • OTU_Network&calc_otu

      1 # -*- coding: utf-8 -*-
      2 # __author__ = 'JieYap'
      3 from biocluster.agent import Agent
      4 from biocluster.tool import Tool
      5 import os
      6 import types
      7 import subprocess
      8 from biocluster.core.exceptions import OptionError
      9 
     10 
     11 class OtunetworkAgent(Agent):
     12     """
     13     需要calc_otu_network.py
     14     version 1.0
     15     author: JieYao
     16     last_modified:2016.8.1
     17     """
     18     
     19     def __init__(self, parent):
     20         super(OtunetworkAgent, self).__init__(parent)
     21         options = [
     22             {"name": "otutable", "type": "infile", "format": "meta.otu.otu_table, meta.otu.tax_summary_dir"},
     23             {"name": "level", "type": "string", "default": "otu"},
     24             {"name": "envtable", "type": "infile", "format": "meta.otu.group_table"},
     25             {"name": "envlabs", "type": "string", "default": ""}
     26             ]
     27         self.add_option(options)
     28         self.step.add_steps('OtunetworkAnalysis')
     29         self.on('start', self.step_start)
     30         self.on('end', self.step_end)
     31 
     32     def step_start(self):
     33         self.step.OtunetworkAnalysis.start()
     34         self.step.update()
     35         
     36     def step_end(self):
     37         self.step.OtunetworkAnalysis.finish()
     38         self.step.update()
     39         
     40     def gettable(self):
     41         """
     42         根据输入的otu表和分类水平计算新的otu表
     43         :return:
     44         """
     45         if self.option('otutable').format == "meta.otu.tax_summary_dir":
     46             return self.option('otutable').get_table(self.option('level'))
     47         else:
     48             return self.option('otutable').prop['path']
     49         
     50     def check_options(self):
     51         """
     52         重写参数检查
     53         """
     54         if not self.option('otutable').is_set:
     55             raise OptionError('必须提供otu表')
     56         self.option('otutable').get_info()
     57         if self.option('otutable').prop['sample_num'] < 2:
     58             raise OptionError('otu表的样本数目少于2,不可进行网络分析')
     59         if self.option('envtable').is_set:
     60             self.option('envtable').get_info()
     61             if self.option('envlabs'):
     62                 labs = self.option('envlabs').split(',')
     63                 for lab in labs:
     64                     if lab not in self.option('envtable').prop['group_scheme']:
     65                         raise OptionError('envlabs中有不在物种(环境因子)表中存在的因子:%s' % lab)
     66             else:
     67                 pass
     68             if len(self.option('envtable').prop['sample']) < 2:
     69                 raise OptionError('物种(环境因子)表的样本数目少于2,不可进行网络分析')
     70         samplelist = open(self.gettable()).readline().strip().split('	')[1:]
     71         if self.option('envtable').is_set:
     72             self.option('envtable').get_info()
     73             if len(self.option('envtable').prop['sample']) > len(samplelist):
     74                 raise OptionError('OTU表中的样本数量:%s少于物种(环境因子)表中的样本数量:%s' % (len(samplelist),
     75                                   len(self.option('envtable').prop['sample'])))
     76             for sample in self.option('envtable').prop['sample']:
     77                 if sample not in samplelist:
     78                     raise OptionError('物种(环境因子)表的样本中存在OTU表中未知的样本%s' % sample)
     79         table = open(self.gettable())
     80         if len(table.readlines()) < 4 :
     81             raise OptionError('数据表信息少于3行')
     82         table.close()
     83         return True
     84 
     85     def set_resource(self):
     86         """
     87         设置所需资源
     88         """
     89         self._cpu = 2
     90         self._memory = ''
     91         
     92     def end(self):
     93         result_dir = self.add_upload_dir(self.output_dir)
     94         result_dir.add_relpath_rules([
     95                 [".", "", "OTU网络分析结果输出目录"],
     96                 ["./real_node_table.txt", "txt", "OTU网络节点属性表"],
     97                 ["./real_edge_table.txt", "txt", "OTU网络边集属性表"],
     98                 ["./real_dc_otu_degree.txt", "txt", "OTU网络OTU节点度分布表"],
     99                 ["./real_dc_sample_degree.txt", "txt", "OTU网络sample节点度分布表"],
    100                 ["./real_dc_sample_otu_degree.txt", "txt", "OTU网络节点度分布表"],
    101                 ["./network_centrality.txt", "txt", "OTU网络中心系数表"],
    102                 ["./network_attributes.txt", "txt", "OTU网络单值属性表"],
    103                 ])
    104         print self.get_upload_files()
    105         super(OtunetworkAgent, self).end()
    106 
    107 
    108 class OtunetworkTool(Tool):
    109     def __init__(self, config):
    110         super(OtunetworkTool, self).__init__(config)
    111         self._version = "1.0.1"
    112         self.cmd_path = self.config.SOFTWARE_DIR + '/bioinfo/meta/scripts/calc_otu_network.py'
    113         self.env_table = self.get_new_env()
    114         self.otu_table = self.get_otu_table()
    115         self.out_files = ['real_node_table.txt', 'real_edge_table.txt', 'real_dc_otu_degree.txt', 'real_dc_sample_degree.txt', 'real_dc_sample_otu_degree.txt', 'network_centrality.txt', 'network_attributes.txt']
    116         
    117         
    118     def get_otu_table(self):
    119         """
    120         根据调用的level参数重构otu表
    121         :return:
    122         """
    123         if self.option('otutable').format == "meta.otu.tax_summary_dir":
    124             otu_path = self.option('otutable').get_table(self.option('level'))
    125         else:
    126             otu_path = self.option('otutable').prop['path']
    127         if self.option('envtable').is_set:
    128             return self.filter_otu_sample(otu_path, self.option('envtable').prop['sample'],
    129                                           os.path.join(self.work_dir, 'temp_filter.otutable'))
    130         else:
    131             return otu_path
    132     
    133     def filter_otu_sample(self, otu_path, filter_samples, newfile):
    134         if not isinstance(filter_samples, types.ListType):
    135             raise Exception('过滤otu表样本的样本名称应为列表')
    136         try:
    137             with open(otu_path, 'rb') as f, open(newfile, 'wb') as w:
    138                 one_line = f.readline()
    139                 all_samples = one_line.rstrip().split('	')[1:]
    140                 if not ((set(all_samples) & set(filter_samples)) == set(filter_samples)):
    141                     raise Exception('提供的过滤样本集合中存在otu表中不存在的样本all:%s,filter_samples:%s' % (all_samples, filter_samples))
    142                 if len(all_samples) == len(filter_samples):
    143                     return otu_path
    144                 samples_index = [all_samples.index(i) + 1 for i in filter_samples]
    145                 w.write('OTU	' + '	'.join(filter_samples) + '
    ')
    146                 for line in f:
    147                     all_values = line.rstrip().split('	')
    148                     new_values = [all_values[0]] + [all_values[i] for i in samples_index]
    149                     w.write('	'.join(new_values) + '
    ')
    150                 return newfile
    151         except IOError:
    152             raise Exception('无法打开OTU相关文件或者文件不存在')
    153 
    154     def get_new_env(self):
    155         """
    156         根据envlabs生成新的envtable
    157         """
    158         if self.option('envlabs'):
    159             new_path = self.work_dir + '/temp_env_table.xls'
    160             self.option('envtable').sub_group(new_path, self.option('envlabs').split(','))
    161             return new_path
    162         else:
    163             return self.option('envtable').path
    164 
    165     def run(self):
    166         """
    167         运行
    168         """
    169         super(OtunetworkTool, self).run()
    170         self.run_otu_network_py()
    171 
    172     def formattable(self, tablepath):
    173         alllines = open(tablepath).readlines()
    174         if alllines[0][0] == '#':
    175             newtable = open(os.path.join(self.work_dir, 'temp_format.table'), 'w')
    176             newtable.write(alllines[0].lstrip('#'))
    177             newtable.writelines(alllines[1:])
    178             newtable.close()
    179             return os.path.join(self.work_dir, 'temp_format.table')
    180         else:
    181             return tablepath
    182 
    183     def run_otu_network_py(self):
    184         """
    185         运行calc_otu_network.py
    186         """
    187         real_otu_path = self.formattable(self.otu_table)
    188         cmd = self.config.SOFTWARE_DIR + '/program/Python/bin/python '
    189         cmd += self.cmd_path
    190         cmd += ' -i %s -o %s' % (real_otu_path, self.work_dir + '/otu_network')
    191         if self.option('envtable').is_set:
    192             cmd += ' -m %s' % (self.env_table)
    193         self.logger.info('开始运行calc_otu_network生成OTU网络并进行计算')
    194         
    195         try:
    196             subprocess.check_output(cmd, shell=True)
    197             self.logger.info('OTU_Network计算完成')
    198         except subprocess.CalledProcessError:
    199             self.logger.info('OTU_Network计算失败')
    200             self.set_error('运行calc_otu_network.py失败')
    201         allfiles = self.get_filesname()
    202         for i in range(len(self.out_files)):
    203             self.linkfile(allfiles[i], self.out_files[i])
    204         self.end()
    205 
    206     def linkfile(self, oldfile, newname):
    207         """
    208         link文件到output文件夹
    209         :param oldfile: 资源文件路径
    210         :param newname: 新的文件名
    211         :return:
    212         """
    213         newpath = os.path.join(self.output_dir, newname)
    214         if os.path.exists(newpath):
    215             os.remove(newpath)
    216         os.link(oldfile, newpath)
    217 
    218     def get_filesname(self):
    219         files_status = [None, None, None, None, None, None, None]
    220         for paths,d,filelist in os.walk(self.work_dir + '/otu_network'):
    221             for filename in filelist:
    222                 name = os.path.join(paths, filename)
    223                 for i in range(len(self.out_files)):
    224                     if self.out_files[i] in name:
    225                         files_status[i] = name
    226         for i in range(len(self.out_files)):
    227             if not files_status[i]:
    228                 self.set_error('未知原因,结果文件生成出错或丢失')
    229         return files_status
    View Code
      1 # -*- coding: utf-8 -*-
      2 # __author__ = 'JieYao'
      3 import os
      4 import argparse
      5 from biocluster.config import Config
      6 import shutil
      7 import networkx
      8 
      9 def make_env_table(inFile, outFile):
     10     with open(inFile, "r") as tmp_file:
     11         samples_name = tmp_file.readline().rstrip().split('	')
     12     with open('group.txt' , "w") as tmp_file:
     13         tmp_file.write("#sample	group
    ")
     14         for i in range(1,len(samples_name)):
     15             tmp_file.write(samples_name[i]+"	STD
    ") 
     16     return './group.txt'
     17 
     18 parser = argparse.ArgumentParser(description='输入OTU表格,生成OTU网络信息')
     19 parser.add_argument('-i', "--otu_matrix", help="输入的OTU表", required = True)
     20 parser.add_argument('-o', "--output", help="输出文件位置", required = True)
     21 parser.add_argument('-m', "--env_table", help="样本分类表", required = False)
     22 args = vars(parser.parse_args())
     23 
     24 flag = False
     25 inFile = args["otu_matrix"]
     26 outFile = args["output"]
     27 if not args["env_table"]:
     28     env_table = make_env_table(inFile, outFile)
     29     flag = True
     30 else:
     31     env_table = args["env_table"]
     32 if os.path.exists(outFile):
     33     shutil.rmtree(outFile)
     34     
     35 """
     36 执行make_otu_network.py 计算otu网络的相关信息并生成文件
     37 完成后由于make_otu_network.py生成的是一个文件夹,使用os和shutil的命令将文件全部移动到输出路径下
     38 """
     39 command = Config().SOFTWARE_DIR + '/program/Python/bin/python '
     40 command += Config().SOFTWARE_DIR + '/program/Python/bin/make_otu_network.py'
     41 command += ' -i %s -o %s -m %s' %(inFile, outFile, env_table)
     42 os.system(command)
     43 if flag:
     44     os.remove("./group.txt")
     45 for paths,d,filelist in os.walk(outFile):
     46     for filename in filelist:
     47         name = os.path.join(paths, filename)
     48         if "reduced" in name:
     49             os.remove(name)
     50         elif "/otu_network/" in name:
     51             shutil.move(name, outFile)
     52 shutil.rmtree(outFile + '/otu_network')
     53 for paths,d,filelist in os.walk(outFile):
     54     for filename in filelist:
     55         name = os.path.join(paths, filename)
     56         if "props" in name:
     57             os.remove(name)
     58 
     59 """
     60 根据node表建立{节点名字---节点编号}的字典
     61 """
     62 node_name = [""]
     63 node_dictionary = {}
     64 with open(outFile + '/real_node_table.txt', "r") as node_file:
     65     informations = node_file.readlines()
     66     for i in range(1, len(informations)):
     67         tmp = informations[i].rstrip().split("	")
     68         node_dictionary[tmp[0]] = i
     69         node_name += [tmp[0]]
     70 """
     71 开始使用Networkx包建立图
     72 计算OTU网络的属性信息
     73 """
     74 G = networkx.Graph()
     75 with open(outFile + "/real_edge_table.txt" , "r") as edge_file:
     76     informations = edge_file.readlines()
     77     for i in range(1, len(informations)):
     78         tmp = informations[i].rstrip().split("	")
     79         G.add_edge(node_dictionary[tmp[0]], node_dictionary[tmp[1]], weight = eval(tmp[2]))
     80 
     81 
     82 """
     83 用实践测试单独对Sample或者是OTU构图的构图方法,
     84 结果证明这样的构图出来的结果基本上Sample是完全图,
     85 OTU单独构图的意义则不大,所以这种想法……失败了。
     86 """
     87 """
     88 H = networkx.Graph()
     89 with open(outFile + "/real_node_table.txt" , "r") as node_file:
     90     informations = node_file.readlines()
     91     for i in range(1,len(informations)):
     92         tmp = informations[i].rstrip().split("	")
     93         if tmp[2] == "otu_node":
     94             break
     95 position = i
     96 for i in range(position):
     97     for j in range(position):
     98         H.add_edge(i,j,weight=0)
     99         for k in range(position,len(G)):
    100             if G.get_edge_data(i,k) and G.get_edge_data(j,k):
    101                 H.edge[i][j]['weight'] += min(G.edge[i][k]['weight']+G.edge[j][k]['weight'])
    102         if H.edge[i][j]['weight'] == 0:
    103             H.remove_edge(i,j)
    104 
    105 minx = 32767
    106 for i in range(position):
    107     for j in range(position):
    108         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
    109             minx = min(minx, H.edge[i][j]['weight'])
    110 
    111 for i in range(position):
    112     for j in range(position):
    113         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
    114             H.edge[i][j]['weight'] -= minx
    115             if H.edge[i][j]['weight'] <=0:
    116                 H.remove_edge(i,j)
    117 print H.edges()
    118 
    119 H = networkx.Graph()
    120 with open(outFile + "/real_node_table.txt" , "r") as node_file:
    121     informations = node_file.readlines()
    122     for i in range(1,len(informations)):
    123         tmp = informations[i].rstrip().split("	")
    124         if tmp[2] == "otu_node":
    125             break
    126 position = i
    127 for i in range(position,len(G)):
    128     for j in range(position,len(G)):
    129         H.add_edge(i,j,weight=0)
    130         for k in range(position):
    131             if G.get_edge_data(i,k) and G.get_edge_data(j,k):
    132                 H.edge[i][j]['weight'] += 1
    133         if H.edge[i][j]['weight'] == 0:
    134             H.remove_edge(i,j)
    135 print len(H)
    136 print len(H.edges())
    137 print H.edges()
    138 
    139 minx = 32767
    140 for i in range(position,len(G)):
    141     for j in range(position,len(G)):
    142         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
    143             minx = min(minx, H.edge[i][j]['weight'])
    144 
    145 for i in range(position):
    146     for j in range(position):
    147         if (i in H)and(j in H)and(H.get_edge_data(i,j)):
    148             H.edge[i][j]['weight'] -= minx
    149             if H.edge[i][j]['weight'] <=0:
    150                 H.remove_edge(i,j)
    151 """
    152 
    153 """3计算属性表,分本3"""
    154 
    155 #节点度中心系数,表示节点在图中的重要性
    156 Degree_Centrality = networkx.degree_centrality(G)
    157 #节点距离中心系数,值越大表示到其他节点距离越近,中心性越高
    158 Closeness_Centrality = networkx.closeness_centrality(G)
    159 #节点介数中心系数,值越大表示通过该节点的最短路径越多,中心性越高
    160 Betweenness_Centrality = networkx.betweenness_centrality(G)
    161 with open(os.path.join(args["output"], "network_centrality.txt"), "w") as tmp_file:
    162     tmp_file.write("Node_ID	Node_Name	Degree_Centrality	")
    163     tmp_file.write("Closeness_Centrality	Betweenness_Centrality
    ")
    164     for i in range(1, len(G)+1):
    165         tmp_file.write(str(i)+"	"+node_name[i]+"	")
    166         tmp_file.write(str(Degree_Centrality[i])+"	")
    167         tmp_file.write(str(Closeness_Centrality[i])+"	")
    168         tmp_file.write(str(Betweenness_Centrality[i])+"
    ")
    169 
    170 
    171 #网络传递性,二分图中应该为0,否则有问题
    172 Transitivity = networkx.transitivity(G)
    173 #网络直径
    174 Diameter = networkx.diameter(G)
    175 #网络平均最短路长度
    176 Average_shortest_path = networkx.average_shortest_path_length(G)
    177 with open(os.path.join(args["output"], "network_attributes.txt"), "w") as tmp_file:
    178     tmp_file.write("Transitivity:"+str(Transitivity)+"
    ")
    179     tmp_file.write("Diameter:"+str(Diameter)+"
    ")
    180     tmp_file.write("Average_shortest_path_length:"+str(Average_shortest_path)+"
    ")
    View Code
  • 相关阅读:
    VS2008 环境中完美搭建 Qt 4.7.4 静态编译的调试与发布 Inchroy's Blog 博客频道 CSDN.NET
    编写可丢弃的代码
    c++ using namespace std; 海明威 博客园
    解决MySQL server has gone away
    nginx upstream 调度策略
    (2006, 'MySQL server has gone away') 错误解决 dba007的空间 51CTO技术博客
    Linux IO模型漫谈(2) 轩脉刃 博客园
    redis源码笔记 initServer 刘浩de技术博客 博客园
    MySQLdb批量插入数据
    词库的扩充百度百科的抓取你知道这些热词吗? rabbit9898 ITeye技术网站
  • 原文地址:https://www.cnblogs.com/neverforget/p/5784957.html
Copyright © 2011-2022 走看看