1、
cut -d " " -f 7- $1 > nuc.ped awk '{for(i = 1; i <= NF; i = i + 2) {printf("%s ", $i)} {printf("\n")}}' nuc.ped > top.ped awk '{for(i = 2; i <= NF; i = i + 2) {printf("%s ", $i)} {printf("\n")}}' nuc.ped > bottom.ped cat top.ped bottom.ped > onecol.ped; rm top.ped bottom.ped for i in $(seq `head -n 1 onecol.ped | awk '{print NF}'`); do cut -d " " -f $i onecol.ped | sort | uniq -c | sort -n | head -n 1 | awk '{print $2}' >> min_allele.txt;done rm -f onecol.ped awk '{for(i = 1; i <= NF; i++) if(i % 2 != 0) {printf("%s_", $i)} else {printf("%s ", $i)} {printf("\n")}}' nuc.ped > a && mv a nuc.ped k=0;for i in `cat min_allele.txt`; do let k++;cut -d " " -f $k nuc.ped > tempx; for j in `cat tempx`; do echo $j | grep -o $i | wc -l >> tempresult; done; done rm -f tempx ind=$(sed -n "$=" nuc.ped ) awk -v a=$ind '{if(NR % a == 0) {printf("%s\n", $0)} else {printf("%s ", $0)}}' tempresult > a && mv a tempresult for i in $(seq `head -n 1 tempresult | awk '{print NF}'`); do cut -d " " -f $i tempresult | paste -s -d " " >> rrr; done mv rrr tempresult cut -f 2 $2 | paste - -d "_" min_allele.txt | paste -s -d " " | cat - tempresult > a && mv a tempresult cut -d " " -f 1-6 $1 | sed "1i FID IID PAT MAT SEX PHENOTYPE" | paste - -d " " tempresult > result.paw rm -f tempresult min_allele.txt nuc.ped
用法:
root@PC1:/home/test/test/test2# ls outcome.map outcome.ped record.r root@PC1:/home/test/test/test2# bash record.r outcome.ped outcome.map root@PC1:/home/test/test/test2# ls outcome.map outcome.ped record.r result.paw root@PC1:/home/test/test/test2# cat result.paw FID IID PAT MAT SEX PHENOTYPE snp1_C snp2_G snp3_T snp4_A snp5_A snp6_G DOR 1 0 0 0 -9 2 0 2 0 1 2 DOR 2 0 0 0 -9 1 1 0 0 0 2 DOR 3 0 0 0 -9 0 0 0 0 0 0 DOR 4 0 0 0 -9 0 0 0 0 0 0 DOR 5 0 0 0 -9 0 0 0 0 0 0 DOR 6 0 0 0 -9 0 0 0 0 0 0 DOR 7 0 0 0 -9 0 0 0 1 2 0 DOR 9 0 0 0 -9 0 0 0 1 2 0 root@PC1:/home/test/test/test2# cat outcome.ped DOR 1 0 0 0 -9 C C C C T T G G A G G G DOR 2 0 0 0 -9 C G G C G G G G G G G G DOR 3 0 0 0 -9 G G C C G G G G G G A A DOR 4 0 0 0 -9 G G C C G G G G G G A A DOR 5 0 0 0 -9 G G C C G G G G G G A A DOR 6 0 0 0 -9 G G C C G G G G G G A A DOR 7 0 0 0 -9 G G C C G G A G A A A A DOR 9 0 0 0 -9 G G C C G G A G A A A A
2、plink软件验证
root@PC1:/home/test/test/test2# ls outcome.map outcome.ped record.r root@PC1:/home/test/test/test2# bash record.r outcome.ped outcome.map root@PC1:/home/test/test/test2# ls outcome.map outcome.ped record.r result.paw root@PC1:/home/test/test/test2# plink --file outcome --recode A --out temp > /dev/null; rm *log *.nosex root@PC1:/home/test/test/test2# ls outcome.map outcome.ped record.r result.paw temp.raw root@PC1:/home/test/test/test2# cat temp.raw FID IID PAT MAT SEX PHENOTYPE snp1_C snp2_G snp3_T snp4_A snp5_A snp6_G DOR 1 0 0 0 -9 2 0 2 0 1 2 DOR 2 0 0 0 -9 1 1 0 0 0 2 DOR 3 0 0 0 -9 0 0 0 0 0 0 DOR 4 0 0 0 -9 0 0 0 0 0 0 DOR 5 0 0 0 -9 0 0 0 0 0 0 DOR 6 0 0 0 -9 0 0 0 0 0 0 DOR 7 0 0 0 -9 0 0 0 1 2 0 DOR 9 0 0 0 -9 0 0 0 1 2 0 root@PC1:/home/test/test/test2# cat result.paw FID IID PAT MAT SEX PHENOTYPE snp1_C snp2_G snp3_T snp4_A snp5_A snp6_G DOR 1 0 0 0 -9 2 0 2 0 1 2 DOR 2 0 0 0 -9 1 1 0 0 0 2 DOR 3 0 0 0 -9 0 0 0 0 0 0 DOR 4 0 0 0 -9 0 0 0 0 0 0 DOR 5 0 0 0 -9 0 0 0 0 0 0 DOR 6 0 0 0 -9 0 0 0 0 0 0 DOR 7 0 0 0 -9 0 0 0 1 2 0 DOR 9 0 0 0 -9 0 0 0 1 2 0 root@PC1:/home/test/test/test2# md5sum result.paw temp.raw 563fbde796e2d64dfc9c4570e71a925f result.paw 563fbde796e2d64dfc9c4570e71a925f temp.raw