zoukankan      html  css  js  c++  java
  • 发表级ggplot绘图流程和技巧 | 论文发表

    The R Graph Gallery - R绘图代码库

    themes - https://www.r-graph-gallery.com/ggplot2-package.html#themes

    要开始修饰以前的核心图片,准备发表论文了。

    把之前比较raw的图修饰格式,统一生成高清晰图片,准备放入paper中。

    会慢慢补充所有常见的绘图代码。

    一个raw image的代码:

    p1 <- ggplot(oxidation.df, aes(x=group, y=score, color=group)) + 
            geom_boxplot() + 
            geom_jitter(shape=16, position=position_jitter(0.2)) +
            labs(title = "Fatty acid metabolism")
    p1
    

      

    第一步:修改df里的标签

    oxidation.df$group <- plyr::mapvalues(oxidation.df$group, 
                                          from = c("GFP- early","GFP+ early","GFP- late","GFP+ late"),
                                          to = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))
    

      

    第二步:修改标签顺序

    oxidation.df$group <- factor(oxidation.df$group, levels = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))
    

      

    第三部:精修格式主题字体

    主题

    常用的主题:https://www.r-graph-gallery.com/ggplot2-package.html#themes

    • theme_bw - 去掉了灰白背景,加了边框,最常用
    • theme_classic - 只留下了加粗的左下边框,最经典,适合实验图
    • egg::theme_article - 只有四周的边框,最适合发表文章,缺点:图例间隔太小
    • theme_minimal - 只留下了grid,没有边框
    • theme_minimal_hgrid - 只留下了hgrid
    • theme_void - 只留下了图例,适合tSNE图
    theme_bw()
    theme_void()
    # remove grid
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
    

     

    修改title

    labs(x = "", y = "Pathway score
    ", title = "Fatty acid metabolism")

    xy轴标签字体大小

    theme(axis.text.x  = element_text(face="plain", angle=30, size = 14, color = "black", vjust=0.6),
            axis.text.y  = element_text(size = 10),
            axis.title.y = element_text(size = 14))

    去掉多余的图例

    theme(legend.position = "none")

    填充颜色

    library(RColorBrewer)
    scale_fill_manual(values=brewer.pal(9,"Paired"))
    scale_color_manual(values=brewer.pal(9,"Paired")[c(3,4,5,6)])

    其他

    限制xy坐标范围

    scale_x_continuous(limits = c(0,2.5))
    scale_y_continuous(limits = c(0,2.5))
    

      

    一组数据的比较

    library(ggpubr)
    stat_compare_means(label.y = 2.3, label.x = 1, size=5)
    

      

    多组数据的比较

    library(ggpubr)
    my_comparisons <- list(c("HhOFF early", "HhON early"), c("HhOFF late", "HhON late"))
    
    stat_compare_means(method = "anova", label.y = 1.29) + # global
    stat_compare_means(comparisons = my_comparisons, label.y = 1, label = "p.signif") + # paired
    scale_y_continuous(limits = c(-0.52, 1.3))
    

      

    代码汇总

    tmp$group <- plyr::mapvalues(tmp$group, from = c("GFP- early","GFP+ early","GFP- late","GFP+ late"),
                                          to = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))
    
    tmp$group <- factor(tmp$group, levels = c("HhOFF early", "HhON early", "HhOFF late", "HhON late"))
    
    library(ggpubr)
    my_comparisons <- list(c("HhOFF early", "HhON early"), c("HhOFF late", "HhON late"))
    
    options(repr.plot.width=4, repr.plot.height=4)
    p1 <- ggplot(tmp, aes(x=group, y=score, color=group)) + 
            geom_boxplot() + 
            theme_bw() +
            labs(x = "", y = "Pathway score
    ", title = "Fatty acid metabolism") +
            geom_jitter(shape=16, position=position_jitter(0.2)) +
            theme(legend.position = "none") + 
            theme(axis.text.x  = element_text(face="plain", angle=30, size = 14, color = "black", vjust=0.6),
            axis.text.y  = element_text(size = 10),
            axis.title.y = element_text(size = 14)) +
            # scale_fill_manual(values=brewer.pal(9,"Paired"))
            scale_color_manual(values=brewer.pal(9,"Paired")[c(3,4,5,6)]) +
            stat_compare_means(method = "anova", label.y = 1.29) + # global
            stat_compare_means(comparisons = my_comparisons, label.y = 1, label = "p.signif") + # paired
            scale_y_continuous(limits = c(-0.52, 1.3))
    p1
    

      

    多图拼接

    options(repr.plot.width=8, repr.plot.height=9)
    cowplot::plot_grid(p1,p2,p3,p4,ncol = 2)
    

      

    PDF出图

    ggsave(filename = "HhOFF HhON metabolic pathways.pdf", width = 8, height = 9)
    

      

    有些图不能这么保存,比如heatmap,这时就要用到pdf函数

    # traditional save
    pdf("manuscript/HSCR.cluster.heatmap.pdf", width=8, height=7)
    p
    dev.off()
    

      

    lnkscape里修改文字【对齐,上下标等等】

    OK, 一个准发表级的图就制作好了,可能还需要精修。


    其余细节

    点的类型

    # change the border of point
    geom_point(shape = 21, colour = "black", fill = "white", size = 5, stroke = 5)
    

    把点拟合成线

    stat_smooth(method = "loess", size = 1.1, se = F, span = 0.2)

    散点图显示mean

    stat_summary(fun.y=mean, geom="point", shape=20, size=7, color="black", fill="black") +

    图例,比如改legend title,改点大小,去掉图例

    labs(x = "
    Transcriptional level",y = "
    Post-transcriptional level", title = "", color = "Clinical score")
    
    # change legend dot size
    guides(colour = guide_legend(override.aes = list(size=10)))
    
    # ggplot remove legend title
    theme(legend.title = element_blank())
    
    # position
    theme(legend.text = element_text(size = 12), legend.position = c(0.8, 0.75))
    # remove legend background
    theme(legend.background=element_blank())

    去掉legend的白色背景布,一步到位

    theme(legend.title = element_blank(), legend.text = element_text(size = 11), legend.position = c(0.85, 0.15),
              legend.background = element_blank())

    标题格式,比如居中

    theme(plot.title = element_text(hjust = 0.5, size = 18))
    

      

    去掉边框,轴线,刻度;去掉右上边框

    # empty border, ticks, text
    theme(panel.border = element_blank(), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), axis.line = element_blank()) +
    labs(x = "",y = "", title = "") +
    theme(axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank())
    
    # remove top and right border
    theme(axis.line = element_line(colour = "black"), panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(), panel.border = element_blank(), panel.background = element_blank())
    

     

    坐标轴,比如修改起点,范围

    # force y start from 0
    scale_y_continuous(expand = c(0, 0), limits = c(0, NA))
    

      

    去掉画布中的网格线条

    # just remove inside grid
    theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
    

      

    添加文本

    # add text
    annotate("text", label = "Wilcoxon test
    P-value = 1.48e-12", x = 0.5, y =2, size = 6, colour = "black")
    

    添加背景色

    # add background color to mark different region
    geom_rect(xmin=0, xmax=2.5, ymin=-2, ymax=-1, fill="#4DAF4A", alpha=1, color=NA)
    

      

    修改填充颜色

    # color
    scale_color_manual(values=brewer.pal(9,"Set1")[c(1:5,7:9)])
    

    精准控制圈图的两种alpha,比如venn图

    scale_color_manual(values = sample.colors) +
    scale_fill_manual(values = alpha(sample.colors, .2))
    

      

    快速统计分析

    # quick statistic testing
    # Wilcoxon test
    wilcox.test(subset(time.df,GeneSet=="Common risk")$Time,
    subset(time.df,GeneSet=="L-HSCR specific")$Time, alternative = "two.sided")
    

    # packages
    # significance
    https://github.com/const-ae/ggsignif

    分组计算,如取mean,单列

    # quick data process
    # get group mean
    weather %>% group_by(city) %>% summarise(mean_temperature = mean(temperature))
    

    分组取mean,多列  

    d <- read.table(text=
    'Name     Month  Rate1     Rate2
    Aira       1      12        23
    Aira       2      18        73
    Aira       3      19        45
    Ben        1      53        19
    Ben        2      22        87
    Ben        3      19        45
    Cat        1      22        87
    Cat        2      67        43
    Cat        3      45        32', header=TRUE)
    
    aggregate(d[, 3:4], list(d$Name), mean)
    
      Group.1    Rate1    Rate2
    1    Aira 16.33333 47.00000
    2     Ben 31.33333 50.33333
    3     Cat 44.66667 54.00000
    

      

    小数点保留,科学计数法

    # format decimals
    formatC(0.46, format = "e", digits = 1)
    library(scales)
    scientific(0.46, digits = 2)
    

      

    查看默认的颜色 - 画图的结果数据

    # see the colors in ggplot
    # To see what colors are used to make your plot you can use function ggplot_build() and then look at data part of this object (in column colour are codes).
    ggplot_build(p)$data
    

      

    其他图种

    配对的箱线图、柱状图、折线图 - 用于比较case和control

    data

    这里想加点需要用另一个函数geom_dotplot

        lineage	lineage.sub	stage	S.Score	G2M.Score	cc.score
        <chr>	<fct>	<chr>	<dbl>	<dbl>	<dbl>
    ctrl_AAACCTGAGACATAAC	NP	NPlate	Control	-0.8162696	-0.98076576	-0.8162696
    ctrl_AAACCTGCAAGTAATG	BP	BP	Control	0.3118349	-0.05584626	0.3118349
    ctrl_AAACCTGCATGCTAGT	GP	GP	Control	0.4443853	0.27702244	0.4443853
    
    # http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
    library(ggpubr)
    
    options(repr.plot.width=5, repr.plot.height=4)
    p <- ggplot(cc.df, aes(x=lineage.sub, y=cc.score, fill=stage)) +
      geom_boxplot(position=position_dodge(1)) +
      geom_dotplot(binaxis='y', stackdir='center', position = "dodge", dotsize=0.15, binwidth=1/25, binpositions="all") +
      theme_bw() +
      labs(x = "", y = "Proliferation score
    ", title = "") +
      theme(axis.text.x  = element_text(face="plain", angle=0, size = 14, color = "black", vjust=0.6),
            axis.text.y  = element_text(size = 10),
            axis.title.y = element_text(size = 14)) +
      scale_fill_manual(values=c("blue","red")) +
      stat_compare_means(aes(group = stage), label = "p.signif", label.y = 4) +
      theme(legend.title = element_blank())
    p
    

      

    封装好的函数

    ggbarplot(ToothGrowth, x = "dose", y = "len", add = "mean_se",
              color = "supp", palette = "jco", 
              position = position_dodge(0.8))+
      stat_compare_means(aes(group = supp), label = "p.signif", label.y = 29)
    ggline(ToothGrowth, x = "dose", y = "len", add = "mean_se",
              color = "supp", palette = "jco")+
      stat_compare_means(aes(group = supp), label = "p.signif", 
                         label.y = c(16, 25, 29))
    

      

    热图 - 最直观

    # heatmap
    https://jokergoo.github.io/ComplexHeatmap-reference/book/

    热图骚操作

    聚类热图怎么按自己的意愿调整分支的顺序? 

    平滑热图 - smooth heatmap 

    monocle里面的一种热图,很多顶刊都在用,也确实很漂亮。对应函数:plot_pseudotime_heatmap

    问题是不够灵活,需要用monocle处理后才行,需要自定义一个处理函数。

    小提琴图marker - 分布

    stacked violin plot for visualizing single-cell data in Seurat

    参见:mouse/singleCell/case/Kif7_ENCC/Kif7-integration/integration_public_and_Kif7.ipynb 

    Venn韦恩图/UpSetR图 - 交集

    R绘制韦恩图 | Venn图 | UpSetR图

    Beeswarm Plot 蜂群图 - 序列数据展开

    https://github.com/eclarke/ggbeeswarm

    #With different beeswarm point distribution priority
    dat <- data.frame(x=rep(1:3,c(20,40,80)))
    dat$y <- rnorm(nrow(dat),dat$x)
    dat$z <- 1
    
    ggplot(dat, aes(z,y)) + 
        geom_beeswarm(size=2,priority='descending', cex=3) + 
        ggtitle('Descending') + 
        scale_x_continuous(expand=expansion(add=c(0.5, 0.5)))
    

      

    我的代码

    set.seed(49)
    library(ggplot2)
    library(ggbeeswarm)
    
    pca_HSCR2$z <- 1
    pca_HSCR2$pseudotime <- -pca_HSCR2$X2
    
    options(repr.plot.width=6, repr.plot.height=4)
    ggplot(pca_HSCR2, aes(x=z, y=pseudotime, fill=severity, color=severity)) + 
        geom_beeswarm(size=1.2,priority='ascending', cex=1.4) + 
        # ggtitle('ascending') + # Descending
        scale_x_continuous(expand=expansion(add=c(0.5, 0.5))) +
        coord_flip() +
        theme_void() +
        scale_color_manual(values=severity.colors)
    

      

    基因模块在pseudotime表达的line图

    参见:mouse/singleCell/case/Kif7_ENCC/Kif7/Kif7_basic_analysis.ipynb

    火山图/对角线图 - 特殊散点图  

    参考:mouse/singleCell/case/Kif7_ENCC/Kif7-integration/Ezh2_analysis.ipynb

    # prepare data
    log2FC <- data.frame(gene=rownames(HSCR.DEG.log2FC.df.final), S_log2FC=S.log2FC, L_log2FC=L.log2FC)
    
    # add color label
    log2FC$color <- "none"
    log2FC[log2FC$gene %in% c("HDAC1"),]$color <- "red"
    
    # the genes want to be labeled
    label.genes <- c('RAMP2', 'HEY1', 'STAMBP', 'CCNB1IP1', 'LMOD3', 'NUP107', 'HEY2', 'FOXO1', 'CRLF1', 'ZFP36L2', 'NR2F2', 'TUBB3', 
                    'ZNF385A', 'TMEM14C', 'FLNA', 'TFAP2A', 'SOX11', 'HDAC1', 'GLI3', 'BCL11A')
    label.df <- subset(log2FC, gene %in% label.genes)
    
    options(repr.plot.width=4.5, repr.plot.height=5)
    library(ggplot2)
    library("ggrepel")
    # Basic scatter plot
    ggplot(log2FC, aes(x=S_log2FC, y=L_log2FC, color=color)) + # , color=coregene
        geom_hline(yintercept=0) +
        geom_vline(xintercept=0) +
        geom_abline(intercept = 0, slope = 1, color="black", linetype="dashed", size=1) +
        geom_point(size=0.5) +
        geom_point(data = label.df, size=2, color = "red") +
        theme_bw() +
        labs(x = "
    Log2FC in S-HSCR",y = "Log2FC in L-HSCR", title = "") +
        theme(legend.title=element_blank()) +
        # Change fontface. Allowed values : 1(normal), 2(bold), 3(italic), 4(bold.italic)
        geom_text_repel(data=label.df, aes(label = gene), size = 3.5, fontface=3, color="red",
                        box.padding = 0.4, max.overlaps = Inf) +
        theme(legend.position = "none", 
            axis.text  = element_text(size = 10),
            # axis.text.y  = element_text(size = 10),
            axis.title = element_text(size = 16, face="plain")) +
        scale_x_continuous(limits = c(-8, 8)) +
        scale_y_continuous(limits = c(-8, 8)) +
        scale_color_manual(values=c("grey","red"))
    

      

    点的文本标记

    geom_text_repel,基本用法


    进阶篇 - 风格统一

    为什么顶刊的图那么的赏心悦目?而自己的图拼到一起却那么的不和谐,都被自己丑哭了,却不知从何下手。

    这里有几个教程还不错:



  • 相关阅读:
    使用Xshell为xftp开ssh通道代理
    linux下查找svn的相关目录的命令
    linux服务器A远程连接服务器B的mysql及1045错误
    怎样下载带权限认证的文件?
    Vue项目打包部署总结
    Vue项目打包压缩:让页面更快响应
    axios请求失败自动重发
    可用的后台管理系统
    vue组件间方式总结
    非脚手架创建vue项目,并使用webpack打包
  • 原文地址:https://www.cnblogs.com/leezx/p/14374326.html
Copyright © 2011-2022 走看看