改进了一下,利用zlib可以读取gz格式的压缩文件,也可以直接计算非压缩格式
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <zlib.h>
#define buff 1024
typedef unsigned long long int u_llong;
static void usage(int num,const char *str)
{
if(num !=2)
{
fprintf(stderr,"usage: %s fqFile
",str);
exit(0);
}
}
static u_llong* gcN(char base[buff])
{
base[strlen(base)-1]=' ';
int i;
static u_llong gactn[]={0,0,0,0,0};
for(i=0; i<strlen(base); i++)
{
if(base[i]=='G')
gactn[0]++;
if(base[i]=='A')
gactn[1]++;
if(base[i]=='C')
gactn[2]++;
if(base[i]=='T')
gactn[3]++;
if(base[i]=='N')
gactn[4]++;
}
return gactn;
}
static void calc(const char *fqfile)
{
//FILE *fq;
gzFile fq;
if((fq=gzopen(fqfile,"r")) == NULL)
{
perror("fopen");
exit(1);
}
//fprintf(stderr,"fq file <%s> open suceed!
",fqfile);
char base[buff];
char qual=0;
u_llong *p=NULL;
while((gzgets(fq,base,buff))!= NULL) // 这里用 gzgets 替代 fgets
{
if(base[0]=='@')
{
continue;
}
if(base[0]=='+')
{
qual=1;
continue;
}
if(qual==1)
{
qual=0;
continue;
}
p=gcN(base); // G A C T N
}
float GClevel;
u_llong sum=0;
for(int i=0; i<5; i++)
{
sum+=*(p+i);
}
GClevel=(float)(*p+*(p+2)) / sum * 100;
fprintf(stdout,"G:%lld A:%lld
C:%lld T:%lld
N:%lld sum:%lld
",*p,*(p+1),*(p+2),*(p+3),*(p+4),sum);
fprintf(stdout,"GC:%.2f%%
",GClevel);
}
int main(int argc,const char *argv[])
{
usage(argc,argv[0]);
calc(argv[1]);
exit(0);
}
备注: gcc编译记得添加参数 -lz