前言: cuckoo是一个强大的工具,几乎我们需要的样本信息cuckoo都可以提供,这两天写了一个提取cuckoo检测信息批量导入数据库的小程序,自觉程序写得少,有很多不足,放上源码,有错误或可以优化的地方希望大家不吝赐教~
开发环境:python2.7 + scapy + CuckooCentOS
您也可以移步https://github.com/Viwilla/CukooToMySQL下载源代码
1 #---------------------------------------------------- 2 #数据库连接在254行,注意端口不要用引号因起来 3 #linux下跑可能会遇到编码问题,请去掉我的中文注释 4 #自觉写程序的经验不足 5 #有错误或可优化的地方希望大家不吝赐教 6 #_author_ = Vi 7 #https://github.com/Viwilla 8 #http://www.cnblogs.com/Viwilla/ 9 #---------------------------------------------------- 10 import json 11 import codecs 12 import sqlite3 13 import os,sys,os.path 14 import MySQLdb 15 import scapy.all as scapy 16 import binascii 17 import shutil 18 reload(sys) 19 sys.setdefaultencoding('utf-8') 20 21 #------------------------------------------------------------- 22 #如果要解析http协议,可以导入htpp包实现 23 #linux导入http失败,因此注释掉这一段,直接提取整个http内容 24 #后面保留了解析http的代码 25 #-------------------------------------------------------------- 26 #try: 27 # This import works from the project directory 28 #import scapy_http.http 29 #except ImportError: 30 # If you installed this package via pip, you just need to execute this 31 #from scapy.layers import http 32 33 re = 0 34 add = 0 35 ID = 0 36 global _MD5 37 totalMD5 = [] 38 strtotal = [] 39 flag = 0 40 ReFlag = 0 41 42 #------------------------------------------------ 43 #ConnectDB(host, user ,paaawd,dbname,port) 44 #------------------------------------------------ 45 def ConnectDB(h, u ,pa,d,p): 46 try: 47 global cur 48 global conn 49 ISOTIMEFORMAT = '%Y-%m-%d %X' 50 conn = MySQLdb.connect( 51 host = h, 52 user = u, 53 passwd = pa, 54 db = d, 55 port = p) 56 cur = conn.cursor() 57 print("use success") 58 except : 59 print "use DB failed" 60 #------------------------------------------------ 61 #寻找最大的ID,下一次插入数据从该ID之后插入 62 #Find the max ID in DataBase 63 #------------------------------------------------ 64 def SelectID(): 65 str = "SELECT MAX(ID) FROM samplesinfo3" 66 global cur 67 cur.execute(str) 68 ID = cur.fetchall() 69 return ID[0][0] 70 #------------------------------------------------ 71 #统计MD5 72 #若数据库中已有该MD5检测信息则跳过 73 #------------------------------------------------ 74 def CountMD5(): 75 query = "SELECT SampleMD5 FROM samplesinfo3" 76 cur.execute(query) 77 md5 = cur.fetchall() 78 global totalMD5 79 for data in md5: 80 if data[0] not in totalMD5: 81 totalMD5.append(data[0]) 82 83 #------------------------------------------------ 84 #解析cuckoo生成的“report.json”文件 85 #提取我们需要的字段 86 #------------------------------------------------ 87 def ReadJSON(file): 88 global ID 89 with open(file) as data_file: 90 data = json.load(data_file) 91 _SHA1 = data['target']['file']['sha1'] 92 global _MD5 93 _MD5 = data['target']['file']['md5'] 94 if ReFlag == 0: 95 if _MD5 not in totalMD5: 96 totalMD5.append(_MD5) 97 elif _MD5 in totalMD5: 98 return 0 99 _Type = data['target']['file']['type'] 100 if not _Type: 101 _Type = '' 102 _Yara= data['target']['file']['yara'] 103 if not _Yara: 104 _Yara= '' 105 try: 106 _360AV = data['virustotal']['scans']['Qihoo-360']['result'] 107 if not _360AV: 108 _360AV = '' 109 except: 110 _360AV = '' 111 try: 112 _Avira = data['virustotal']['scans']['Avira']['result'] 113 if not Avira: 114 Avira = '' 115 except: 116 _Avira = '' 117 try: 118 _ClamAV = data['virustotal']['scans']['ClamAV']['result'] 119 if not _ClamAV: 120 _ClamAV = '' 121 except: 122 _ClamAV = '' 123 try: 124 _Eset = data['virustotal']['scans']['ESET-NOD32']['result'] 125 if not _Eset: 126 _Eset = '' 127 except: 128 _Eset = '' 129 try: 130 _F_Secure = data['virustotal']['scans']['F-Secure']['result'] 131 if not _F_Secure: 132 _F_Secure = '' 133 except: 134 _F_Secure = '' 135 try: 136 _Kaspersky = data['virustotal']['scans']['Kaspersky']['result'] 137 if not _Kaspersky: 138 _Kaspersky = '' 139 except: 140 _Kaspersky = '' 141 try: 142 _Symantec = data['virustotal']['scans']['Symantec']['result'] 143 if not _Symantec: 144 _Symantec = '' 145 except: 146 _Symantec = '' 147 148 str1 = "{}".format(" '%s','%s',\"%s\",'%s','%s','%s','%s','%s','%s','%s','%s',"%(_SHA1, _MD5,_Type, _Yara ,_360AV, _Avira, _ClamAV , _Eset ,_F_Secure, _Kaspersky, _Symantec)) 149 return str1 150 151 #---------------------------------------------- 152 #解析cuckoo生成的Pcap包 153 #提取我们需要的信息 154 #---------------------------------------------- 155 def ReadPcap(file,str0,str1): 156 packets = scapy.rdpcap(file) 157 for p in packets: 158 #显示scapy解析内容,调试用 159 #print '=' * 78 160 #p.show() 161 strID = "('%d',"%ID 162 _IP = '' 163 _dns = '' 164 _flow = '' 165 if p.payload.name == 'ARP': 166 continue 167 if p.payload.name == 'IP': 168 #保存IP 169 if p.payload.src == '192.168.229.111': 170 dst ="dst_%s:%d"%(p.payload.dst, p.payload.payload.dport) 171 _IP = dst 172 elif p.payload.dst =='192.168.229.111': 173 src ="src_%s:%d"%(p.payload.src, p.payload.payload.sport) 174 _IP = src 175 176 # 解析TCP协议 177 if p.payload.proto == 6: 178 #提取十六进制流量数据 179 if p.payload.payload.payload.name == 'Raw': 180 load = str(binascii.b2a_hex(p.load)) 181 _flow = load 182 if p.payload.payload.payload.name == 'HTTP': 183 if p.payload.payload.payload.payload.name == 'HTTP Response': 184 #若要提取ascii数据可直接用p.payload 185 #ascii = p.load 186 #if ascii not in asciidata: 187 #asciidata.append(ascii) 188 load = str(binascii.b2a_hex(p.load)) 189 _flow = load 190 191 #解析UDP 192 elif p.payload.proto == 17: 193 if p.payload.payload.payload.name== 'Raw': 194 load = str(binascii.b2a_hex(p.load)) 195 _flow = load 196 #保存DNS 197 if p.payload.payload.payload.name == 'DNS': 198 dns = p.payload.payload.payload.qd.qname 199 _dns = dns 200 201 else: 202 print "No rule for protocol %s"%p.payload.proto 203 continue 204 205 else: 206 print "No rule for %s"%p.payload.name 207 continue 208 209 #--------插入信息去重--------- 210 strc = _MD5 + _dns + _IP + _flow 211 if strc not in strtotal: 212 strtotal.append(strc) 213 str2 = "'%s','%s','%s');"%( _dns, _IP,_flow) 214 _str1 = str0 + strID + str1 +str2 215 ToDB(_str1) 216 global flag 217 flag = 1 218 continue 219 #flag=1 : 标志该样本有流量信息 220 #flag = 0:标志该样本无流量信息 221 if flag == 0: 222 strID = "('%d',"%ID 223 str2 = "'%s','%s','%s');"%('','','') 224 _str2 = str0 + strID + str1 + str2 225 #print _str2 226 ToDB(_str2) 227 return 228 229 #------------------------------------------------ 230 #SQL操作 231 #------------------------------------------------ 232 def ToDB(_str): 233 #try: 234 cur.execute(_str) 235 conn.commit() 236 global ID 237 ID = ID +1 238 addstr = " '%s' added"% _MD5 239 print addstr 240 ReFlag = 1 241 return 242 243 #------------------------------------------------ 244 #main() 245 #------------------------------------------------ 246 def main(): 247 rootdir = '/root/cuckoo/storage/analyses/' 248 #n=样本个数+1 249 n = len(os.listdir(rootdir)) 250 Js = "reports/report.json" 251 pcap = 'dump.pcap' 252 if not os.path.exists("pcap"): 253 os.mkdir("pcap") 254 ConnectDB('ip', 'username', 'password', 'database', 'port')#端口去掉引号!! 255 global ID 256 try: 257 ID = SelectID() + 1 258 except: 259 ID = 1 260 startID = ID 261 str0 = "INSERT INTO samplesinfo(ID,SampleSHA1, SampleMD5, SampleType, Yara, 360AV, Avira, ClamAV, Eset ,F_Secure, Kaspersky, Symantec,DNS_IP, IP_Port,Flow)values" 262 CountMD5() 263 for id in range(1,n): 264 file1 = rootdir + '%d/'%id + Js 265 file2 = rootdir + '%d/'%id + pcap 266 result = ReadJSON(file1) 267 if result == 0: 268 print "%s already exists!"%_MD5 269 global re 270 re = re +1 271 continue 272 else: 273 str1 = result 274 ReadPcap(file2,str0,str1) 275 global flag 276 if flag == 1: 277 pcapname = "pcap/%s"%_MD5 278 if not os.path.exists(pcapname): 279 shutil.copy(file2,pcapname) 280 else: 281 print "pcap '%s' exists"%_MD5 282 flag = 0 283 global ReFlag 284 ReFlag = 0 285 286 add = ID - startID 287 print "%d items already exists!"%re 288 print "Successfully add %d items, from %d to %d ."%(add,startID,ID - 1) 289 cur.close() 290 conn.close() 291 292 if __name__ == '__main__': 293 main() 294 295 exit()