zoukankan      html  css  js  c++  java
  • shell 和 R 实现具有映射关系的数据的批量替换

    1、测试数据

    root@PC1:/home/test# ls
    5gene_CDs.bed  id_mapping.txt
    root@PC1:/home/test# head -n 3 5gene_CDs.bed
    chr6    117609654       117609965       NM_001378891.1_cds_0_0_chr6_117609655_r 0       -
    chr6    117622136       117622300       NM_001378891.1_cds_1_0_chr6_117622137_r 0       -
    chr6    117629956       117630091       NM_001378891.1_cds_2_0_chr6_117629957_r 0       -
    root@PC1:/home/test# head -n 3 id_mapping.txt   ## 在5gene_CDs.bed文件中实现id_mapping.txt中第二列对第一列的批量替换
    NM_001378891.1  ROS1
    NM_001378902.1  ROS1
    NM_002944.3     ROS1

    2、shell实现

    root@PC1:/home/test# ls
    5gene_CDs.bed  id_mapping.txt
    root@PC1:/home/test# head -n 3 5gene_CDs.bed
    chr6    117609654       117609965       NM_001378891.1_cds_0_0_chr6_117609655_r 0       -
    chr6    117622136       117622300       NM_001378891.1_cds_1_0_chr6_117622137_r 0       -
    chr6    117629956       117630091       NM_001378891.1_cds_2_0_chr6_117629957_r 0       -
    root@PC1:/home/test# head -n 3 id_mapping.txt
    NM_001378891.1  ROS1
    NM_001378902.1  ROS1
    NM_002944.3     ROS1
    root@PC1:/home/test# cp 5gene_CDs.bed 5gene_CDs.bed.bak  ## 要在源文件中修改,防止以外发生对数据做备份
    root@PC1:/home/test# ls
    5gene_CDs.bed  5gene_CDs.bed.bak  id_mapping.txt
    root@PC1:/home/test# cat id_mapping.txt | while read {i,j}; do sed -i "s/$i/$j/" 5gene_CDs.bed; done   ## 循环中i和j分别存储每行中对应的两个变量
    root@PC1:/home/test# head -n 3 5gene_CDs.bed
    chr6    117609654       117609965       ROS1_cds_0_0_chr6_117609655_r   0       -
    chr6    117622136       117622300       ROS1_cds_1_0_chr6_117622137_r   0       -
    chr6    117629956       117630091       ROS1_cds_2_0_chr6_117629957_r   0       -

    3、R实现

    > dir()
    [1] "5gene_CDs.bed"  "id_mapping.txt"
    > mapping=read.table("id_mapping.txt",sep="\t")
    > head(mapping,2)
                  V1   V2
    1 NM_001378891.1 ROS1
    2 NM_001378902.1 ROS1
    > bed=read.table("5gene_CDs.bed",sep="\t")
    > head(bed, 2)
        V1        V2        V3                                      V4 V5 V6
    1 chr6 117609654 117609965 NM_001378891.1_cds_0_0_chr6_117609655_r  0  -
    2 chr6 117622136 117622300 NM_001378891.1_cds_1_0_chr6_117622137_r  0  -
    > for (i in 1:nrow(mapping)) {
    +   bed$V4 <- sub(mapping$V1[i], mapping$V2[i], bed$V4)
    + }
    > head(bed,2)
        V1        V2        V3                            V4 V5 V6
    1 chr6 117609654 117609965 ROS1_cds_0_0_chr6_117609655_r  0  -
    2 chr6 117622136 117622300 ROS1_cds_1_0_chr6_117622137_r  0  -

    测试数据来源: https://mp.weixin.qq.com/s?__biz=MzI4ODE0NTE3OA==&mid=2649219537&idx=1&sn=6974c6e3b91b6e91ac407563f10d3835&chksm=f3d1aba4c4a622b243e58434d561e2a08db148fb98499eaec626f6e186ec5cfa912c6db5ab93&mpshare=1&scene=23&srcid=1118L4C0wtH1cYHmjtZD2lrk&sharer_sharetime=1637166249484&sharer_shareid=4ed060cc4cd1efce40e3ab6dd8d8c7d4#rd

  • 相关阅读:
    LeetCode 25 Reverse Nodes in k-Group
    圆桌派:家世背景对人的影响有多大
    BibTex 学习笔记
    R parallel包实现多线程1
    IIS学习笔记
    高效完成R代码
    圆桌派 :我们,朋友一生一起走
    高文欣个人简介
    R语言函数话学习笔记5
    git学习笔记1
  • 原文地址:https://www.cnblogs.com/liujiaxin2018/p/15578793.html
Copyright © 2011-2022 走看看