1 # -*- coding: utf-8 -*-
2 # __author__ = 'JieYap'
3 from biocluster.agent import Agent
4 from biocluster.tool import Tool
5 import os
6 import types
7 import subprocess
8 from biocluster.core.exceptions import OptionError
9
10
11 class OtunetworkAgent(Agent):
12 """
13 需要calc_otu_network.py
14 version 1.0
15 author: JieYao
16 last_modified:2016.8.1
17 """
18
19 def __init__(self, parent):
20 super(OtunetworkAgent, self).__init__(parent)
21 options = [
22 {"name": "otutable", "type": "infile", "format": "meta.otu.otu_table, meta.otu.tax_summary_dir"},
23 {"name": "level", "type": "string", "default": "otu"},
24 {"name": "envtable", "type": "infile", "format": "meta.otu.group_table"},
25 {"name": "envlabs", "type": "string", "default": ""}
26 ]
27 self.add_option(options)
28 self.step.add_steps('OtunetworkAnalysis')
29 self.on('start', self.step_start)
30 self.on('end', self.step_end)
31
32 def step_start(self):
33 self.step.OtunetworkAnalysis.start()
34 self.step.update()
35
36 def step_end(self):
37 self.step.OtunetworkAnalysis.finish()
38 self.step.update()
39
40 def gettable(self):
41 """
42 根据输入的otu表和分类水平计算新的otu表
43 :return:
44 """
45 if self.option('otutable').format == "meta.otu.tax_summary_dir":
46 return self.option('otutable').get_table(self.option('level'))
47 else:
48 return self.option('otutable').prop['path']
49
50 def check_options(self):
51 """
52 重写参数检查
53 """
54 if not self.option('otutable').is_set:
55 raise OptionError('必须提供otu表')
56 self.option('otutable').get_info()
57 if self.option('otutable').prop['sample_num'] < 2:
58 raise OptionError('otu表的样本数目少于2,不可进行网络分析')
59 if self.option('envtable').is_set:
60 self.option('envtable').get_info()
61 if self.option('envlabs'):
62 labs = self.option('envlabs').split(',')
63 for lab in labs:
64 if lab not in self.option('envtable').prop['group_scheme']:
65 raise OptionError('envlabs中有不在物种(环境因子)表中存在的因子:%s' % lab)
66 else:
67 pass
68 if len(self.option('envtable').prop['sample']) < 2:
69 raise OptionError('物种(环境因子)表的样本数目少于2,不可进行网络分析')
70 samplelist = open(self.gettable()).readline().strip().split(' ')[1:]
71 if self.option('envtable').is_set:
72 self.option('envtable').get_info()
73 if len(self.option('envtable').prop['sample']) > len(samplelist):
74 raise OptionError('OTU表中的样本数量:%s少于物种(环境因子)表中的样本数量:%s' % (len(samplelist),
75 len(self.option('envtable').prop['sample'])))
76 for sample in self.option('envtable').prop['sample']:
77 if sample not in samplelist:
78 raise OptionError('物种(环境因子)表的样本中存在OTU表中未知的样本%s' % sample)
79 table = open(self.gettable())
80 if len(table.readlines()) < 4 :
81 raise OptionError('数据表信息少于3行')
82 table.close()
83 return True
84
85 def set_resource(self):
86 """
87 设置所需资源
88 """
89 self._cpu = 2
90 self._memory = ''
91
92 def end(self):
93 result_dir = self.add_upload_dir(self.output_dir)
94 result_dir.add_relpath_rules([
95 [".", "", "OTU网络分析结果输出目录"],
96 ["./real_node_table.txt", "txt", "OTU网络节点属性表"],
97 ["./real_edge_table.txt", "txt", "OTU网络边集属性表"],
98 ["./real_dc_otu_degree.txt", "txt", "OTU网络OTU节点度分布表"],
99 ["./real_dc_sample_degree.txt", "txt", "OTU网络sample节点度分布表"],
100 ["./real_dc_sample_otu_degree.txt", "txt", "OTU网络节点度分布表"],
101 ["./network_centrality.txt", "txt", "OTU网络中心系数表"],
102 ["./network_attributes.txt", "txt", "OTU网络单值属性表"],
103 ])
104 print self.get_upload_files()
105 super(OtunetworkAgent, self).end()
106
107
108 class OtunetworkTool(Tool):
109 def __init__(self, config):
110 super(OtunetworkTool, self).__init__(config)
111 self._version = "1.0.1"
112 self.cmd_path = self.config.SOFTWARE_DIR + '/bioinfo/meta/scripts/calc_otu_network.py'
113 self.env_table = self.get_new_env()
114 self.otu_table = self.get_otu_table()
115 self.out_files = ['real_node_table.txt', 'real_edge_table.txt', 'real_dc_otu_degree.txt', 'real_dc_sample_degree.txt', 'real_dc_sample_otu_degree.txt', 'network_centrality.txt', 'network_attributes.txt']
116
117
118 def get_otu_table(self):
119 """
120 根据调用的level参数重构otu表
121 :return:
122 """
123 if self.option('otutable').format == "meta.otu.tax_summary_dir":
124 otu_path = self.option('otutable').get_table(self.option('level'))
125 else:
126 otu_path = self.option('otutable').prop['path']
127 if self.option('envtable').is_set:
128 return self.filter_otu_sample(otu_path, self.option('envtable').prop['sample'],
129 os.path.join(self.work_dir, 'temp_filter.otutable'))
130 else:
131 return otu_path
132
133 def filter_otu_sample(self, otu_path, filter_samples, newfile):
134 if not isinstance(filter_samples, types.ListType):
135 raise Exception('过滤otu表样本的样本名称应为列表')
136 try:
137 with open(otu_path, 'rb') as f, open(newfile, 'wb') as w:
138 one_line = f.readline()
139 all_samples = one_line.rstrip().split(' ')[1:]
140 if not ((set(all_samples) & set(filter_samples)) == set(filter_samples)):
141 raise Exception('提供的过滤样本集合中存在otu表中不存在的样本all:%s,filter_samples:%s' % (all_samples, filter_samples))
142 if len(all_samples) == len(filter_samples):
143 return otu_path
144 samples_index = [all_samples.index(i) + 1 for i in filter_samples]
145 w.write('OTU ' + ' '.join(filter_samples) + '
')
146 for line in f:
147 all_values = line.rstrip().split(' ')
148 new_values = [all_values[0]] + [all_values[i] for i in samples_index]
149 w.write(' '.join(new_values) + '
')
150 return newfile
151 except IOError:
152 raise Exception('无法打开OTU相关文件或者文件不存在')
153
154 def get_new_env(self):
155 """
156 根据envlabs生成新的envtable
157 """
158 if self.option('envlabs'):
159 new_path = self.work_dir + '/temp_env_table.xls'
160 self.option('envtable').sub_group(new_path, self.option('envlabs').split(','))
161 return new_path
162 else:
163 return self.option('envtable').path
164
165 def run(self):
166 """
167 运行
168 """
169 super(OtunetworkTool, self).run()
170 self.run_otu_network_py()
171
172 def formattable(self, tablepath):
173 alllines = open(tablepath).readlines()
174 if alllines[0][0] == '#':
175 newtable = open(os.path.join(self.work_dir, 'temp_format.table'), 'w')
176 newtable.write(alllines[0].lstrip('#'))
177 newtable.writelines(alllines[1:])
178 newtable.close()
179 return os.path.join(self.work_dir, 'temp_format.table')
180 else:
181 return tablepath
182
183 def run_otu_network_py(self):
184 """
185 运行calc_otu_network.py
186 """
187 real_otu_path = self.formattable(self.otu_table)
188 cmd = self.config.SOFTWARE_DIR + '/program/Python/bin/python '
189 cmd += self.cmd_path
190 cmd += ' -i %s -o %s' % (real_otu_path, self.work_dir + '/otu_network')
191 if self.option('envtable').is_set:
192 cmd += ' -m %s' % (self.env_table)
193 self.logger.info('开始运行calc_otu_network生成OTU网络并进行计算')
194
195 try:
196 subprocess.check_output(cmd, shell=True)
197 self.logger.info('OTU_Network计算完成')
198 except subprocess.CalledProcessError:
199 self.logger.info('OTU_Network计算失败')
200 self.set_error('运行calc_otu_network.py失败')
201 allfiles = self.get_filesname()
202 for i in range(len(self.out_files)):
203 self.linkfile(allfiles[i], self.out_files[i])
204 self.end()
205
206 def linkfile(self, oldfile, newname):
207 """
208 link文件到output文件夹
209 :param oldfile: 资源文件路径
210 :param newname: 新的文件名
211 :return:
212 """
213 newpath = os.path.join(self.output_dir, newname)
214 if os.path.exists(newpath):
215 os.remove(newpath)
216 os.link(oldfile, newpath)
217
218 def get_filesname(self):
219 files_status = [None, None, None, None, None, None, None]
220 for paths,d,filelist in os.walk(self.work_dir + '/otu_network'):
221 for filename in filelist:
222 name = os.path.join(paths, filename)
223 for i in range(len(self.out_files)):
224 if self.out_files[i] in name:
225 files_status[i] = name
226 for i in range(len(self.out_files)):
227 if not files_status[i]:
228 self.set_error('未知原因,结果文件生成出错或丢失')
229 return files_status