zoukankan      html  css  js  c++  java
  • 非模式物种的注释

    一句话:我们经常会面对非模式物种的GO或者KEGG富集与注释。

    1、载入我们所需要的包

    if("clusterProfiler" %in% rownames(installed.packages()) == FALSE) {source("http://bioconductor.org/biocLite.R");biocLite("clusterProfiler")}
    biocLite("colorspace")
    suppressMessages(library(clusterProfiler))
    ls("package:clusterProfiler")
    
    library(AnnotationHub)
    library(biomaRt)
    library(enrichGO)
    

    2、收索目标物种

    hub <- AnnotationHub()
    unique(hub$dataprovider)   #查看数据注释来源
    query(hub, "Apis cerana")
    

     

    3、抓取目标OrgDb

    Apis_cerana.OrgDb <- hub[["AH62635"]]
    columns(Apis_cerana.OrgDb)
    Apis_cerana.OrgDb
    

     

    4、了解这个Apis_cerana.OrgDb对象

    4.1、以symbol形式展示

    head(keys(Apis_cerana.OrgDb,keytype = "SYMBOL"),30) 
    

     

    4.2、可以查看gene类型

    keytypes(Apis_cerana.OrgDb)

    4.3、默认基因是以ENTREZID,例如查看前90个基因。

    keys(Apis_cerana.OrgDb)[1:50]

    4.4、格式转换。例如将上面前50个基因转换为SYMBOL格式对照

    bitr(keys(Apis_cerana.OrgDb)[1:50], 'ENTREZID', "SYMBOL", Apis_cerana.OrgDb)

    4.5、多个格式转化,例如将上面的50个基因转换为多个形式

    bitr(keys(Apis_cerana.OrgDb), 'ENTREZID', c("SYMBOL","REFSEQ", "GO", "ONTOLOGY"), Apis_cerana.OrgDb)

    5、测试

    5.1、取前100个基因集做测试

    sample_genes <- keys(Apis_cerana.OrgDb,keytype = "ENTREZID")[1:100]

    5.2、做GO富集分析

    enrich.go = enrichGO( gene         = sample_genes,
                          OrgDb        = Apis_cerana.OrgDb,
                          keyType      = 'SYMBOL',
                          ont= "ALL",
                          pAdjustMethod = 'BH',
                          pvalueCutoff = 0.05,
                          qvalueCutoff = 0.1,
                          readable     = T)
    head(enrich.go)dim(hainan_enrich.go)
    dim(hainan_enrich.go[hainan_enrich.go$ONTOLOGY=='BP',]) 
    dim(hainan_enrich.go[hainan_enrich.go$ONTOLOGY=='CC',]) 
    dim(hainan_enrich.go[hainan_enrich.go$ONTOLOGY=='MF',]) 
    

     

    画图

    barplot(hainan_enrich.go,showCategory=20,drop=T)
    dotplot(hainan_enrich.go,showCategory=50)
    write.table(as.data.frame(hainan_enrich.go), file="hainan_enrich_go.xls")
    

      

     5.3、画GO分类图

    5.3.1、重新取样,然后做GO富集分析

    rm(list=ls()) #重头开始
    sample_genes <- keys(Apis_cerana.OrgDb,keytype = "ENTREZID")[1:100]   #采样100个基因
    ego_ALL  =  enrichGO(         gene         =sample_genes,
                                 OrgDb        = Apis_cerana.OrgDb,
                                 keyType      = 'ENTREZID',
                                 ont= "ALL",
                                 pAdjustMethod = 'BH',
                                 pvalueCutoff =1,
                                 qvalueCutoff =1,
                                 readable     = T) 
    

     5.3.2、对富集结果进行分类

    ego_result_BP <- ego_ALL[ego_ALL$ONTOLOGY=='BP',]
    dim(ego_result_BP)
    
    ego_result_CC <- ego_ALL[ego_ALL$ONTOLOGY=='CC',]
    dim(ego_result_CC)
    
    ego_result_MF <- ego_ALL[ego_ALL$ONTOLOGY=='MF',]
    dim(ego_result_MF)
    

     构建数据框

    display_number = c(48, 26, 39)
    go_enrich_df <- data.frame(ID          = c(ego_result_BP$ID, ego_result_CC$ID, ego_result_MF$ID), 
                               Description = c(ego_result_BP$Description, ego_result_CC$Description, ego_result_MF$Description), 
                               GeneNumber  = c(ego_result_BP$Count, ego_result_CC$Count, ego_result_MF$Count), 
                               type        = factor(c(rep("biological process", display_number[1]), rep("cellular component", display_number[2]), rep("molecular function", display_number[3])), levels=c("molecular function", "cellular component", "biological process"))) 
    

     缩短名称

    ## numbers as data on x axis
    go_enrich_df$number <- factor(rev(1:nrow(go_enrich_df)))
    #shorten the names of GO terms 
    shorten_names <- function(x, n_word=4, n_char=40){ 
                   if (length(strsplit(x, " ")[[1]]) > n_word || (nchar(x) > 40)) 
                   { 
                     if (nchar(x) > 40) x <- substr(x, 1, 40) 
                     x <- paste(paste(strsplit(x, " ")[[1]][1:min(length(strsplit(x," ")[[1]]), n_word)], collapse=" "), "...", sep="") 
                     return(x) } 
                   else { return(x) } 
    }
    

     设置画图轴

    labels=(sapply(levels(go_enrich_df$Description)[as.numeric(go_enrich_df$Description)], shorten_names)) 
    names(labels) = rev(1:nrow(go_enrich_df))
    

     画图

    library(ggplot2) 
    CPCOLS <- c("#8DA1CB", "#FD8D62", "#66C3A5")
    library(ggplot2) 
    CPCOLS <- c("#8DA1CB", "#FD8D62", "#66C3A5")
    p <- ggplot(data=go_enrich_df, aes(x=number, y=GeneNumber, fill=type)) + 
                geom_bar(stat="identity", width=0.8) + 
                coord_flip() + 
                scale_fill_manual(values = CPCOLS) + 
                theme_bw() + 
                scale_x_discrete(labels=labels) + 
                xlab("GO term") + 
                theme(axis.text=element_text(face = "bold", color="gray50")) + 
                labs(title = "ALL Enriched GO Terms")
    p
    pdf("go_enrichment_of_sample_targets.pdf")
    dev.off()
    svg("go_enrichment_of_sample_targets.svg")
    p
    dev.off()
    

     参考文献

    https://blog.csdn.net/sinat_30623997/article/details/79250940https://blog.csdn.net/sinat_30623997/article/details/79250940

  • 相关阅读:
    springcloud有哪些特征
    可变参数
    递归
    增强的for循环
    Scanner对象
    注释
    Markdown常见的样式语法
    副本机制
    消费者分区分配策略
    SpringMVC 登陆判断
  • 原文地址:https://www.cnblogs.com/djx571/p/10271874.html
Copyright © 2011-2022 走看看