zoukankan      html  css  js  c++  java
  • R语言实现统计plink格式数据基因频率

    1、

    dir()
    dat <- read.table("outcome.ped")
    dat <- dat[,-(1:6)]
    loci <- data.frame()
    loci[1:(nrow(dat) * 2), 1] <- 1
    
    for (i in 1:(ncol(dat)/2))
    {
        loci <- cbind(loci, c(dat[, 2*i -1], dat[,2 * i]))
    }
    loci <- loci[, -1]
    
    colnames(loci) <- paste0("c",1:8)
    result <- data.frame()
    temp <- vector()
    for (i in 1:ncol(loci)) 
    {
      locidat <- as.data.frame(table(loci[,i]))
      if (nrow(locidat) == 1 & locidat[1, 1] == 0 ) {
        temp <- c(0, 0, 0, 1)
      }else
      if (nrow(locidat) == 1 & locidat[1, 1] != 0 ){
        temp <- c(0, 0, as.character(locidat[1, 1]), 1)
      }else
      if(nrow(locidat) == 2) {
        locidat <- locidat[order(locidat$Var1),]
        if (locidat[1, 1] == 0) {
          temp <- c(0,0, as.character(locidat[2,1]), 1)
        }else
        {
          locidat <- locidat[order(locidat$Freq),]
          temp <- c(as.character(locidat[1,1]), locidat[1,2]/sum(locidat$Freq), as.character(locidat[2,1]), locidat[2,2]/sum(locidat$Freq))
        }
      }else
      if (nrow(locidat) == 3){
        locidat <- locidat[locidat$Var1 != 0, ]
        locidat <- locidat[order(locidat$Freq),]
        temp <- c(as.character(locidat[1,1]),locidat[1,2]/sum(locidat$Freq), as.character(locidat[2,1]), locidat[2,2]/sum(locidat$Freq) )
      }
       result <- rbind(result, temp) 
    }
    
    for (i in c(2,4)) {
      result[,i] = round(as.numeric(result[,i]),5)
    }
    
    
    map <- read.table("outcome.map")
    result <- cbind(map[,c(1:2,4)], result)
    colnames(result) <- c("chr", "snp", "pos", "minor", "freq", "major", "freq")
    result
    write.csv(result, "result.csv", row.names = F)
    dir()

    2、plink软件验证

    [root@centos79 test]# plink --file outcome --freq --sheep --out verify >/dev/null; rm *.log *.nosex
    [root@centos79 test]# ls
    outcome.map  outcome.ped  verify.frq
    [root@centos79 test]# cat verify.frq
     CHR  SNP   A1   A2          MAF  NCHROBS
       1 snp1    0    G            0       12
       1 snp2    G    C          0.2       10
       1 snp3    0    0           NA        0
       1 snp4    0    G            0        8
       1 snp5    A    G       0.3333       12
       1 snp6    A    T          0.5       12
       1 snp7    A    G       0.4167       12
       1 snp8    G    C       0.4167       12

  • 相关阅读:
    三、视频操作
    C# SendKeys使用方法介绍
    3.如已交60%档,现想交提高缴费档次该怎么办?
    四、答疑解惑
    C# 获取当前网页HTML
    (二)灵活就业人员养老保险和医疗保险
    字符编码(转)
    .NET跨页面传值的方法
    正则表达式之匹配关系(转)
    javascript对DOM的常用操作
  • 原文地址:https://www.cnblogs.com/liujiaxin2018/p/15486946.html
Copyright © 2011-2022 走看看