跟着学校老师搞流量识别时写的,能把PCAP或ERF文件中的流分组后排序生成新的文件,思路很乱,好像还有点bug,分析一个114MB的PCAP文件没问题,但是分析1个2.7G的大数据集,在写入新文件的时候就出问题了,Wireshark打不开。有空再改改。
#include <winsock2.h>
#include <map>
#include <list>
#include <vector>
#include <cstdio>
#include <omp.h>
#include <algorithm>
#include <cstdlib>
using namespace std;
#define TRACE_PCAP 1
#define TRACE_ERF 2
#define LINK_ETH 0x00000001
#define LINK_11C 0x0000000B
#define PROT_IP 0x0800
#define PROT_TCP 0x06
#define PROT_UDP 0x11
#define PROT_ICMP 0x01
#define PCAP_HDR_LEN 16
#define ERF_HDR_LEN 18 //including eth protocol header
#define ETH_HDR_LEN 14
#define C11_HDR_LEN 8
#define UM_FLOW WM_USER + 100
#define BUFF_LEN 204800
#define HASH_LEN 2000000
#define MICE_FLOW_PKTS 4
struct thread_param
{
u_short thread_id;
LARGE_INTEGER fp_start;
LARGE_INTEGER fp_end;
};
struct eth_header
{
unsigned char h_dest[6]; /* destination eth addr */
unsigned char h_source[6]; /* source ether addr */
unsigned short h_proto; /* packet type ID field */
};
struct c11_header
{
unsigned char h_unkown[6]; /* */
unsigned short h_proto; /* packet type ID field */
};
struct ip_header
{
unsigned char iphVerLen; // Version and length
unsigned char ipTOS; // Type of service
unsigned short ipLength; // Total datagram length
unsigned short ipID; // Identification
unsigned short ipFlags; // Flags
unsigned char ipTTL; // Time to live
unsigned char ipProtocol; // Protocol
unsigned short ipChecksum; // Header checksum
unsigned long ipSource; // Source address
unsigned long ipDestination; // Destination address
};
struct tcp_header
{
unsigned short sourcePort; // Source Port
unsigned short destinationPort; // Destination Port
unsigned long sequenceNumber; // Number of Sequence
unsigned long acknowledgeNumber; // Number of aknowledge
unsigned char dataoffset; // Pointer to data
unsigned char flags; // Flags
unsigned short windows; // Size of window
unsigned short checksum; // Total checksum
unsigned short urgentPointer; // Urgent pointer
};
struct udp_header
{
unsigned short sourcePort; // Source Port
unsigned short destinationPort; // Destination Port
unsigned short len; // Total length
unsigned short checksum; // Total checksum
};
struct pcap_file_header
{
unsigned long magic;
unsigned short version_major;
unsigned short version_minor;
unsigned long thiszone;
unsigned long sigfigs;
unsigned long snaplen;
unsigned long linktype;
};
struct pcap_pkthdr
{
unsigned long sec;
unsigned long usec;
unsigned long cap_len;
unsigned long len;
};
struct erf_pkthdr
{
unsigned long usec;
unsigned long sec;
unsigned char type;
unsigned char flags;
unsigned short rlen;
unsigned short lctr;
unsigned short wlen;
};
struct db_pkt
{
unsigned long sec;
unsigned long usec;
char src_ip[20];
char dst_ip[20];
unsigned short src_port;
unsigned short dst_port;
char trs_prot[8];
unsigned short pkt_len;
};
struct trns_pkt_info
{
unsigned long sec;
unsigned long usec;
unsigned long src_ip; // Source address
unsigned long dst_ip; // Destination address
unsigned short src_port; //Source port
unsigned short dst_port; //Destination port
unsigned char trs_prot;
unsigned short pkt_len;
unsigned long len;
char* buff;
};
struct flow_info
{
unsigned long id;
unsigned long src_ip; // Source address
unsigned long dst_ip; // Destination address
unsigned short src_port; //Source port
unsigned short dst_port; //Destination port
unsigned long n_pkt; //Number of packets
unsigned int f_start; //start position in the flow file
unsigned int f_end; //end position in the flow file
};
int ipcmp(const unsigned long& ip1,const unsigned long& ip2)
{
if( (ip1&0x000000ff) < (ip2&0x000000ff) ) return -1;
if( (ip1&0x000000ff) > (ip2&0x000000ff) ) return 1;
if( (ip1&0x0000ff00)>>8 < (ip2&0x0000ff00)>>8 ) return -1;
if( (ip1&0x0000ff00)>>8 > (ip2&0x0000ff00)>>8 ) return 1;
if( (ip1&0x00ff0000)>>16 < (ip2&0x00ff0000)>>16 ) return -1;
if( (ip1&0x00ff0000)>>16 > (ip2&0x00ff0000)>>16 ) return 1;
if( (ip1&0xff000000)>>24 < (ip2&0xff000000)>>24 ) return -1;
if( (ip1&0xff000000)>>24 > (ip2&0xff000000)>>24 ) return 1;
return 0;
}
struct session
{
unsigned long ip1;
unsigned long ip2; //ip1 always < ip2
unsigned short port1;
unsigned short port2;
bool operator <(const session& x) const
{
if (ipcmp(ip1,x.ip1)<0) return true;
if (ipcmp(ip1,x.ip1)>0) return false;
if (ipcmp(ip2,x.ip2)<0) return true;
if (ipcmp(ip2,x.ip2)>0) return false;
/*
if (ip1<x.ip1) return true;
if (ip1>x.ip1) return false;
if (ip2<x.ip2) return true;
if (ip2>x.ip2) return false;
*/
if (port1<x.port1) return true;
if (port1>x.port1) return false;
if (port2<x.port2) return true;
if (port2>x.port2) return false;
return false;
}
};
map<session,list<trns_pkt_info> >g_flowmap;
char g_tracefile[MAX_PATH] = "1.pcap";
int g_tracetype = TRACE_PCAP;
u_short g_linktype = LINK_ETH;
FILE *fp_trace = NULL;
pcap_file_header g_fhdr;
long NextTransPacket(trns_pkt_info &pkt, FILE *fp_trace)
{
int cap_len; //cap packet length does not include format header
char *buff, *p;
long res = 0;
memset(&pkt, 0, sizeof(pkt));
if (!fp_trace)return 0;
if (feof(fp_trace))return 0;
if (g_tracetype == TRACE_PCAP)
{
struct pcap_pkthdr pkthdrp;
if ((fread((void *)&pkthdrp, 1, sizeof(struct pcap_pkthdr), fp_trace)) < sizeof(struct pcap_pkthdr))
return 0;
cap_len = pkthdrp.cap_len; //The cap_len of a pcap packet does not include the pcap header, big-endian
pkt.sec = pkthdrp.sec;
pkt.usec = pkthdrp.usec;
res = cap_len + sizeof(struct pcap_pkthdr);
}
else if (g_tracetype == TRACE_ERF)
{
struct erf_pkthdr pkthdre;
if ((fread((void *)&pkthdre, 1, sizeof(struct erf_pkthdr), fp_trace) )< sizeof(struct erf_pkthdr))
return 0;
cap_len = ntohs(pkthdre.rlen) - sizeof(struct erf_pkthdr) - 2; //The rlen of a erf packet includes the erf header,little-endian
pkt.sec = ntohl(pkthdre.sec);
pkt.usec = ntohl(pkthdre.usec);
fseek(fp_trace, 2, SEEK_CUR); //Skip eth protocol header
res = ntohs(pkthdre.rlen);
}
buff = (char *)malloc(cap_len);
u_short link_prot;
struct ip_header *iph;
struct eth_header *ethh;
struct c11_header *c11h;
struct tcp_header *tcph;
struct udp_header *udph;
char *pHeader;
// in_addr addr;
unsigned int IPHeaderLength;
if (fread(buff, 1, cap_len, fp_trace) < cap_len)
return FALSE;
pkt.buff=(char *)malloc(res);
fseek(fp_trace,-res,SEEK_CUR);
fread(pkt.buff,1,res,fp_trace);
pkt.len=res;
pHeader = buff;
if (g_linktype == LINK_11C)
{
c11h = (struct c11_header *)pHeader;
link_prot = ntohs(c11h->h_proto);
}
else
{
ethh = (struct eth_header *)pHeader;
link_prot = ntohs(ethh->h_proto);
if (link_prot == 0x0081) //VLAN tag
{
pHeader += 4;
ethh = (struct eth_header *)pHeader;
link_prot = ntohs(ethh->h_proto);
}
}
if (link_prot == PROT_IP) //IP packet
{
pHeader += (g_linktype == LINK_11C) ? sizeof(struct c11_header) : sizeof(struct eth_header);
iph = (struct ip_header *)pHeader;
IPHeaderLength = ((iph->iphVerLen) & 0x0F) * 4;
pkt.src_ip = iph->ipSource;
pkt.dst_ip = iph->ipDestination;
pkt.trs_prot = iph->ipProtocol;
if (iph->ipProtocol == PROT_TCP) //TCP packet
{
pHeader += IPHeaderLength;
tcph = (struct tcp_header *)pHeader;
pkt.src_port = ntohs(tcph->sourcePort);
pkt.dst_port = ntohs(tcph->destinationPort);
pkt.pkt_len = ntohs(iph->ipLength) - IPHeaderLength - sizeof(struct tcp_header);
}
else if (iph->ipProtocol == PROT_UDP) //UDP packet
{
pHeader += IPHeaderLength;
udph = (struct udp_header *)pHeader;
pkt.src_port = ntohs(udph->sourcePort);
pkt.dst_port = ntohs(udph->destinationPort);
pkt.pkt_len = ntohs(iph->ipLength) - IPHeaderLength - sizeof(struct udp_header);
}
}
free(buff);
return res;
}
void ReadFlowFile()
{
fp_trace=fopen(g_tracefile,"rb");
if (!fp_trace)return;
if (g_tracetype == TRACE_PCAP)
{
fread((void *)&g_fhdr, 1, sizeof(pcap_file_header), fp_trace);
g_linktype = g_fhdr.linktype;
}
trns_pkt_info pkt;
long res;
session tmp;
while(res=NextTransPacket(pkt,fp_trace))
{
if(pkt.src_ip==0||pkt.dst_ip==0) continue;
pkt.buff=(char *)malloc(res);
fseek(fp_trace,-res,SEEK_CUR);
fread(pkt.buff,1,res,fp_trace);
pkt.len=res;
if(ipcmp(pkt.src_ip,pkt.dst_ip)<0)
//if(pkt.src_ip<pkt.dst_ip)
{
tmp.ip1=pkt.src_ip;
tmp.ip2=pkt.dst_ip;
tmp.port1=pkt.src_port;
tmp.port2=pkt.dst_port;
}
else
{
tmp.ip1=pkt.dst_ip;
tmp.ip2=pkt.src_ip;
tmp.port1=pkt.dst_port;
tmp.port2=pkt.src_port;
}
if(!g_flowmap.count(tmp))
{
list<trns_pkt_info> li;
li.push_back(pkt);
g_flowmap[tmp]=li;
}
else
{
g_flowmap[tmp].push_back(pkt);
pkt.buff=NULL;
}
}
fclose(fp_trace);
fp_trace=NULL;
}
void ShowAllFlows()
{
session tmp;
in_addr addr1,addr2;
printf("共有%d组数据
",g_flowmap.size());
system("PAUSE");
map<session,list<trns_pkt_info> >::iterator it;
int i;
for(it=g_flowmap.begin(),i=1;it!=g_flowmap.end();it++,i++)
{
tmp=it->first;
printf("
----------------------------------
");
printf("第%d组数据:
",i);
memcpy(&addr1,&tmp.ip1,4);
memcpy(&addr2,&tmp.ip2,4);
printf("ip1:%s port1:%d
",inet_ntoa(addr1),tmp.port1);
printf("ip2:%s port2:%d
",inet_ntoa(addr2),tmp.port2);
printf("共有%d个包:
",it->second.size());
for(list<trns_pkt_info>::iterator j=it->second.begin();j!=it->second.end();j++)
{
memcpy(&addr1,&j->src_ip,4);
memcpy(&addr2,&j->dst_ip,4);
printf("src:%s port:%d ",inet_ntoa(addr1),j->src_port);
printf("dst:%s port:%d
",inet_ntoa(addr2),j->dst_port);
}
printf("----------------------------------
");
system("PAUSE");
}
}
void ConvergeFlows()
{
char pkt_fname[MAX_PATH];
char buff[BUFF_LEN];
if(g_tracetype == TRACE_PCAP)
sprintf(pkt_fname, "%s.pkt.pcap", g_tracefile);
else
sprintf(pkt_fname, "%s.pkt.erf", g_tracefile);
FILE *fp_pkt = fopen(pkt_fname, "wb");
if (g_tracetype == TRACE_PCAP)
{
struct pcap_pkthdr pkthdrp;
fwrite((void *)&g_fhdr, 1, sizeof(struct pcap_file_header), fp_pkt);
}
map<session,list<trns_pkt_info> >::iterator it;
trns_pkt_info pkt;
for(it=g_flowmap.begin();it!=g_flowmap.end();it++)
{
for(list<trns_pkt_info>::iterator j=it->second.begin();j!=it->second.end();j++)
{
pkt=*j;
fwrite(pkt.buff, sizeof(char), pkt.len, fp_pkt);
}
}
fclose(fp_pkt);
}
int main()
{
ReadFlowFile();
// ShowAllFlows();
ConvergeFlows();
}