zoukankan      html  css  js  c++  java
  • miRNA分析

    综述文章

    microRNA相关的数据库与预测、功能分析软件
    https://www.plob.org/article/1157.html

    miRNA测序概述及实验分析流程
    http://www.southgene.com/newsshow.php?cid=55&id=68

    示范 小RNA 项目结题报告
    http://www.biotrainee.com/jmzeng/html_report/d/e/e/p/i/n/SmallRNA_result/index.html

    标准数据

    流程样例 & 实用工具+FASTX-Toolkit (v0.014)

    FastQC (v0.11.8)
    mulitiQC(v1.6)
    miRDeep2(v2.0.0) (Anders和Huber,2010)
    DESeq(v1.34.0) (Friedlander等,2012)
    R(v3.5.1)

    #合并
     
    adapter="TCGATTCGTATGCCGTCTTCTGCTTG" 
    inputfile="exp.fa"
    index="/home/wj/data/gsm/hg"
    outmap=$inputfile".mapper.fa"
    outarf=$inputfile"_vs_genome.arf"
    pre="hairpin.human.fasta"
    mature="mature.human.fasta"
    str="miRNA.str"
    tail=$inputfile
    hgfile="Homo_sapiens.GRCh38.fa"
    type="hsa"
    mapper.pl $inputfile -c -v -i -j -m -k $adapter -l 18 -p $index -n -s $outmap -t $outarf -o 16
    quantifier.pl -p $pre -m $mature -r $outmap -t hsa -y $tail 
    miRDeep2.pl $outmap  $hgfile $outarf $mature $other $pre -t $type 
    
     
    
    # 1. index 
    
    bowtie-build hg.fa hg
    
    # 2. mapper
    
    mapper.pl 输入文件-e-h -i-j -m -k adapter -l 18 -p参考基因组的index -s处理过的reads输出的文件名-t输出文件名.arf
    
    # 3. count
    
    quantifier.pl -p前体序列参考文件.fa -m成熟序列参考文件.fa -r上一步处理过的reads输出文件-s从miRBase上下载的star序列文件-t物种名称-y文件名后缀
    
    # 4. 鉴定测序得到的未知miRNA
    
    miRDeep2.pl处理过的reads输出文件基因组文件处理过的输出文件名.arf成熟miRNA文件其他物种的成熟miRNA文件研究物种miRNA前体的文件-t物种名称2>report.log
    
    # 5.  miRNA表达矩阵以及差异表达分析
    
    
    
    
    

    程序

    <代码>

    #  "shell"## step1 : download raw data
    echo {14..19} |sed 's/ /
    /g' |while read id;  
    do fastq-dump SRR15427$id ;
    done
    ## step2 :  change sra data to fastq files## step3 : download the results from paper
    mkdir paper_results && cd paper_results
    wget ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE60nnn/GSE60292/suppl/GSE60292_RAW.tar
    ## tar xvf GSE60292_RAW.tar
    ls *gz |while read id ; do (echo $id;zcat $id | cut -f 2 |perl -alne '{$t+=$_;}END{print $t}');done
    ls *gz |xargs gunzip
    ## step4 : quality assessment
    ls *fastq |while read id
    do
    echo $id
    fastq_quality_filter -v -q 20 -p 80 -Q33  -i $id -o tmp ;
    fastx_trimmer -v -f 1 -l 27 -i tmp  -Q33 -z -o ${id%%.*}_clean.fq.gz ;
    done
    rm tmp
    ## discarded 12%~~49%%
    ls *_clean.fq.gz | while read id ; 
    do  fastqc $id;
    done
    mkdir QC_results
    mv *zip *html QC_results 
    ## step5 : alignment to miRBase v21 (hairpin.human.fa/mature.human.fa )
    pathdata="/home/wj/data"
    #### step5.1 using bowtie2 to do alignment
    mkdir  bowtie2_index &&  cd bowtie2_index
    bowtie2-build $pathdata/hairpin.human.fasta hairpin_human
    bowtie2-build $pathdata/mature.human.fasta  mature_human
    ls *_clean.fq.gz | while read id ; do  bowtie2 -x ./bowtie2_index/hairpin_human -U $id   -S ${id%%.*}.hairpin.sam ; done
    ls *_clean.fq.gz | while read id ; do  bowtie2 -x ./bowtie2_index/mature_human  -U $id   -S ${id%%.*}.mature.sam ; done
    ## overall alignment rate:  10.20% / 5.71%/ 10.18%/ 4.36% / 10.02% / 4.95%  (before convert U to T )## overall alignment rate:  51.77% / 70.38%/51.45% /61.14%/ 52.20% / 65.85% (after convert U to T )## overall alignment rate:  6.67% / 3.78% / 6.70% / 2.80%/ 6.55% / 3.23%    (before convert U to T )## overall alignment rate:  34.94% / 46.16%/ 35.00%/ 38.50% / 35.46% /42.41%(after convert U to T )#### step5.2 using SHRiMP to do alignment##    http://compbio.cs.toronto.edu/shrimp/README##    3.5 Mapping cDNA reads against a miRNA database 
    ##  We project the database with:
    pathdata="/home/wj/data"
    $SHRIMP_FOLDER/utils/project-db.py --shrimp-mode ls $pathdata/hairpin.human.fasta  $SHRIMP_FOLDER/bin/gmapper-ls -L  hairpin.human-ls SRR1542716.fastq  --qv-offset 33 -o 1 -H -E -a -1 -q -30 -g -30 --qv-offset 33 --strata -N 8  >map.out 2>map.log
    ## step6: counts the reads which mapping to each miRNA reference.## we need to exclude unmapped as well as multiple-mapped  reads## XS:i:<n> Alignment score for second-best alignment. Can be negative. Can be greater than 0 in --local mode## NM:i:1   ## NM i Edit distance to the reference, including ambiguous bases but excluding clipping#The following command exclude unmapped (-F 4) as well as multiple-mapped (grep -v “XS:”) reads#samtools view -F 4 input.bam | grep -v "XS:" | wc -l## 180466//1520320##cat >count.hairpin.sh
    ls *hairpin.sam  | while read id
    do
    samtools view  -SF 4 $id |perl -alne '{$h{$F[2]}++}END{print "$_	$h{$_}" foreach sort keys %h }'  > ${id%%_*}.hairpin.counts
    done
    
    ## bash count.hairpin.sh##cat >count.mature.sh
    ls *mature.sam  | while read id
    do
    samtools view  -SF 4 $id |perl -alne '{$h{$F[2]}++}END{print "$_	$h{$_}" foreach sort keys %h }'  > ${id%%_*}.mature.counts
    done
    ## bash count.mature.sh
    
    
    # R part 
    
    ## step8: differential expression analysis by R package for miRNA expression patterns:## 文章里面提到的结果是:## MicroRNA sequencing revealed over 250 known and 34 predicted novel miRNAs to be differentially expressed between ET-1 stimulated and unstimulated control hiPSC-CMs.## (FDR < 0.1 and 1.5 fold change)
    rm(list=ls())
    setwd('J:\miRNA_test\paper_results')  ##把从GEO里面下载的文献结果放在这里
    sampleIDs=c()
    groupList=c()
    allFiles=list.files(pattern = '.txt')
    i=allFiles[1]
    sampleID=strsplit(i,"_")[[1]][1]
    treat=strsplit(i,"_")[[1]][4]
    dat=read.table(i,stringsAsFactors = F)
    colnames(dat)=c('miRNA',sampleID)
    groupList=c(groupList,treat)
    for (i in allFiles[-1]){
    sampleID=strsplit(i,"_")[[1]][1]
    treat=strsplit(i,"_")[[1]][4]
    a=read.table(i,stringsAsFactors = F)
    colnames(a)=c('miRNA',sampleID)
    dat=merge(dat,a,by='miRNA')
    groupList=c(groupList,treat)
    }
    ### 上面的代码只是为了把6个独立的表达文件给合并成一个表达矩阵## we need to filter the low expression level miRNA
    exprSet=dat[,-1]
    rownames(exprSet)=dat[,1]
    suppressMessages(library(DESeq2))
    exprSet=ceiling(exprSet)
    (colData <- data.frame(row.names=colnames(exprSet), groupList=groupList))
    ## DESeq2就是这么简单的用
    dds <- DESeqDataSetFromMatrix(countData = exprSet,
    colData = colData,
    design = ~ groupList)
    dds <- DESeq(dds)
    png("qc_dispersions.png", 1000, 1000, pointsize=20)
    plotDispEsts(dds, main="Dispersion plot")
    dev.off()
    res <- results(dds)
    ## 画一些图,相当于做QC吧
    png("RAWvsNORM.png")
    rld <- rlogTransformation(dds)
    exprSet_new=assay(rld)
    par(cex = 0.7)
    n.sample=ncol(exprSet)
    if(n.sample>40) par(cex = 0.5)
    cols <- rainbow(n.sample*1.2)
    par(mfrow=c(2,2))
    boxplot(exprSet,  col = cols,main="expression value",las=2)
    boxplot(exprSet_new, col = cols,main="expression value",las=2)
    hist(exprSet[,1])
    hist(exprSet_new[,1])
    dev.off()library(RColorBrewer)
    (mycols <- brewer.pal(8, "Dark2")[1:length(unique(groupList))])
    # Sample distance heatmap
    sampleDists <- as.matrix(dist(t(exprSet_new)))
    #install.packages("gplots",repos = "http://cran.us.r-project.org")library(gplots)
    png("qc-heatmap-samples.png", w=1000, h=1000, pointsize=20)
    heatmap.2(as.matrix(sampleDists), key=F, trace="none",
    col=colorpanel(100, "black", "white"),
    ColSideColors=mycols[groupList], RowSideColors=mycols[groupList],
    margin=c(10, 10), main="Sample Distance Matrix")
    dev.off()png("MA.png")
    DESeq2::plotMA(res, main="DESeq2", ylim=c(-2,2))
    dev.off()
    ## 重点就是这里啦,得到了差异分析的结果
    resOrdered <- res[order(res$padj),]
    resOrdered=as.data.frame(resOrdered)
    write.csv(resOrdered,"deseq2.results.csv",quote = F)##下面也是一些图,主要是看看样本之间的差异情况library(limma)
    plotMDS(log(counts(dds, normalized=TRUE) + 1))
    plotMDS(log(counts(dds, normalized=TRUE) + 1) - log(t( t(assays(dds)[["mu"]]) / sizeFactors(dds) ) + 1))
    plotMDS( assays(dds)[["counts"]] )  ## raw count
    plotMDS( assays(dds)[["mu"]] ) ##- fitted values.
     
    

    <说明>

    exprSet

  • 相关阅读:
    5.2 TensorFlow:模型的加载,存储,实例
    5.2 TensorFlow:模型的加载,存储,实例
    卷积神经网络(cnn)的体系结构
    卷积神经网络(cnn)的体系结构
    Python学习笔记(四) 函数
    Python学习笔记(三) 字典
    Python学习笔记(二) 字符串
    Python学习笔记(一) 列表和元组
    Linux学习笔记(七) 查询系统
    Linux学习笔记(六) 进程管理
  • 原文地址:https://www.cnblogs.com/zwj-pot/p/14859434.html
Copyright © 2011-2022 走看看