zoukankan      html  css  js  c++  java
  • RNA速率scVelo

    import scvelo as scv
    
    adata = scv.read("E:/data_exercise/day1_27/scV/data/Pancreas/adata_dynamics.h5ad", cache=True)
    
    adata
    
    AnnData object with n_obs × n_vars = 3696 × 2000
        obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts'
        var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling'
        uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics'
        obsm: 'X_pca', 'X_umap'
        varm: 'loss'
        layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced'
        obsp: 'connectivities', 'distances'
    
    # 计算速度矢量
    scv.tl.velocity(adata, mode='dynamical')
    scv.tl.velocity_graph(adata)
    
    computing velocities
        finished (0:00:06) --> added 
        'velocity', velocity vectors for each individual cell (adata.layers)
    computing velocity graph
        finished (0:00:11) --> added 
        'velocity_graph', sparse matrix with cosine correlations (adata.uns)
    
    #汇出嵌入模型
    scv.pl.velocity_embedding_grid(adata, basis='umap')
    
    computing velocity embedding
        finished (0:00:01) --> added
        'velocity_umap', embedded velocity vectors (adata.obsm)
    

    #汇出嵌入模型
    scv.pl.velocity_embedding_stream(adata, basis='umap')
    

    #计算cell cycle score并可视化
    scv.tl.score_genes_cell_cycle(adata)
    
    scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], basis="umap", smooth=True, perc=[5, 95])
    
    calculating cell cycle phase
    -->     'S_score' and 'G2M_score', scores of cell cycle phases (adata.obs)
    

    #3.5 计算细胞状态并可视化
    
    scv.tl.terminal_states(adata)
    
    scv.pl.scatter(adata, color=[ 'root_cells', 'end_points'], basis='umap') 
    #'root_cells', root cells of Markov diffusion process (adata.obs)
    #'end_points', end points of Markov diffusion process (adata.obs)
    
    
    computing terminal states
    WARNING: Uncertain or fuzzy root cell identification. Please verify.
        identified 2 regions of root cells and 1 region of end points .
        finished (0:00:00) --> added
        'root_cells', root cells of Markov diffusion process (adata.obs)
        'end_points', end points of Markov diffusion process (adata.obs)
    

    adata
    
    AnnData object with n_obs × n_vars = 3696 × 2000
        obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points'
        var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
        uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
        obsm: 'X_pca', 'X_umap', 'velocity_umap'
        varm: 'loss'
        layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
        obsp: 'connectivities', 'distances'
    
    #计算pseudotime,可视化,与导出
    
    scv.tl.velocity_pseudotime(adata)
    
    scv.pl.scatter(adata=adata, color='velocity_pseudotime', cmap='gnuplot', basis="umap")
    
    adata.obs["velocity_pseudotime"].to_csv('velocyto_pseudotime.csv')
    

    #查看特定基因的velocity
    
    var_names = ['Snhg6', 'Sbspon', 'Eif2s3y', 'Sntg1'] #adata.var 下的index就是基因名
    
    scv.pl.velocity(adata, var_names=var_names, colorbar=True, ncols=2)
    
    

    adata
    
    AnnData object with n_obs × n_vars = 3696 × 2000
        obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points', 'velocity_pseudotime'
        var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
        uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
        obsm: 'X_pca', 'X_umap', 'velocity_umap'
        varm: 'loss'
        layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
        obsp: 'connectivities', 'distances'
    
    df = adata.var
    df
    
    highly_variable_genes gene_count_corr means dispersions dispersions_norm highly_variable fit_r2 fit_alpha fit_beta fit_gamma ... fit_std_s fit_likelihood fit_u0 fit_s0 fit_pval_steady fit_steady_u fit_steady_s fit_variance fit_alignment_scaling velocity_genes
    index
    Sntg1 True NaN 0.005065 0.131135 0.214469 True 0.401981 0.015726 0.005592 0.088792 ... 0.030838 0.406523 0.0 0.0 0.159472 2.470675 0.094304 0.149138 5.355590 False
    Snhg6 False NaN 0.375835 0.158585 0.040765 True 0.125072 0.389126 2.981982 0.260322 ... 0.245248 0.243441 0.0 0.0 0.403409 0.106128 0.596630 0.762252 2.037296 True
    Ncoa2 False NaN 0.106945 0.145879 0.298358 True -2.313923 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN False
    Sbspon True NaN 0.143032 0.277016 1.044508 True 0.624803 0.464865 2.437113 0.379645 ... 0.178859 0.252135 0.0 0.0 0.182088 0.164805 0.430623 0.674312 1.193015 True
    Ube2w False NaN 0.018522 0.109459 0.091136 True -5.324582 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN False
    ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
    Tmem27 True NaN 1.297619 2.475960 3.254982 True 0.527855 3.954723 21.314334 0.253763 ... 5.014219 0.292956 0.0 0.0 0.496411 0.168456 12.425889 0.389388 3.163113 False
    Uty False NaN 0.018673 0.182256 0.505338 True -0.555835 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN False
    Ddx3y True NaN 0.165256 0.350978 1.465338 True -2.538731 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN False
    Eif2s3y True NaN 0.182643 0.318686 1.281603 True 0.352190 0.193458 0.746410 0.295085 ... 0.158051 0.262140 0.0 0.0 0.390710 0.219716 0.424033 0.748614 2.312621 True
    Erdr1 True NaN 0.344720 0.215833 0.288351 True -0.193263 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN False

    2000 rows × 23 columns

    df.to_csv("adata.var.csv",index=True, header=True )
    
    # 数据框筛选那些基因
    df = df[(df['fit_likelihood'] > .1) & df['velocity_genes'] == True]
    
    kwargs = dict(xscale='log', fontsize=16)
    
    df
    
    highly_variable_genes gene_count_corr means dispersions dispersions_norm highly_variable fit_r2 fit_alpha fit_beta fit_gamma ... fit_std_s fit_likelihood fit_u0 fit_s0 fit_pval_steady fit_steady_u fit_steady_s fit_variance fit_alignment_scaling velocity_genes
    index
    Snhg6 False NaN 0.375835 0.158585 0.040765 True 0.125072 0.389126 2.981982 0.260322 ... 0.245248 0.243441 0.0 0.0 0.403409 0.106128 0.596630 0.762252 2.037296 True
    Sbspon True NaN 0.143032 0.277016 1.044508 True 0.624803 0.464865 2.437113 0.379645 ... 0.178859 0.252135 0.0 0.0 0.182088 0.164805 0.430623 0.674312 1.193015 True
    Fam135a True NaN 0.257955 0.171546 0.096820 True 0.384662 0.172335 0.118088 0.204538 ... 0.149868 0.283343 0.0 0.0 0.387921 1.345830 0.393197 0.671096 3.390194 True
    Tbc1d8 True NaN 0.070612 0.123167 0.169130 True 0.613194 0.083656 0.118139 0.227401 ... 0.081595 0.246594 0.0 0.0 0.392402 0.659434 0.240100 0.829478 2.914215 True
    Fhl2 True NaN 0.232482 0.818012 2.892669 True 0.325223 0.224027 0.330934 0.121456 ... 0.317963 0.262849 0.0 0.0 0.476793 0.539878 1.073687 0.695997 4.785407 True
    ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
    Map3k15 True NaN 0.180941 0.427601 1.901315 True 0.437069 0.252467 0.348783 0.275921 ... 0.225956 0.289070 0.0 0.0 0.434454 0.611380 0.560048 0.510572 2.208756 True
    Rai2 True NaN 0.231965 0.651755 2.173640 True 0.769033 0.792172 2.885604 0.647345 ... 0.276047 0.271263 0.0 0.0 0.311969 0.243741 0.716996 0.707595 1.063908 True
    Rbbp7 False NaN 1.017536 0.422314 0.153713 True 0.344213 1.446806 5.332362 0.326539 ... 0.796754 0.281498 0.0 0.0 0.477365 0.208398 2.985787 0.704522 2.100311 True
    Ap1s2 True NaN 0.127856 0.319473 1.286081 True 0.716659 0.202643 0.596662 0.392769 ... 0.140455 0.286893 0.0 0.0 0.140955 0.315635 0.366964 0.671211 1.775751 True
    Eif2s3y True NaN 0.182643 0.318686 1.281603 True 0.352190 0.193458 0.746410 0.295085 ... 0.158051 0.262140 0.0 0.0 0.390710 0.219716 0.424033 0.748614 2.312621 True

    1006 rows × 23 columns

    #RNA转录,剪接和降解的速率。
    with scv.GridSpec(ncols=3) as pl:
        pl.hist(df['fit_alpha'], xlabel='transcription rate', **kwargs)
        pl.hist(df['fit_beta'] * df['fit_scaling'], xlabel='splicing rate', xticks=[.1, .4, 1], **kwargs)
        pl.hist(df['fit_gamma'], xlabel='degradation rate', xticks=[.1, .4, 1], **kwargs)
    

    adata
    
    AnnData object with n_obs × n_vars = 3696 × 2000
        obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points', 'velocity_pseudotime'
        var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
        uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
        obsm: 'X_pca', 'X_umap', 'velocity_umap'
        varm: 'loss'
        layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
        obsp: 'connectivities', 'distances'
    
    #latent_time
    scv.tl.latent_time(adata)
    
    
    computing latent time using root_cells as prior
        finished (0:00:02) --> added 
        'latent_time', shared time (adata.obs)
    
    scv.pl.scatter(adata, color='latent_time', color_map='gnuplot', size=80)
    

    top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:300]
    top_genes
    
    Index(['Pcsk2', 'Dcdc2a', 'Ank', 'Gng12', 'Top2a', 'Pak3', 'Tmem163', 'Nfib',
           'Pim2', 'Smoc1',
           ...
           'Cyr61', 'Ptprn2', 'Rpa2', 'Nit2', 'Cdc20', 'Hmmr', 'Ankrd12', 'Itpr1',
           'Auts2', 'Mdfi'],
          dtype='object', name='index', length=300)
    
    scv.pl.heatmap(adata, var_names=top_genes, sortby='latent_time', col_color='clusters', n_convolve=100)
    
    

    top = df['fit_likelihood'].sort_values(ascending=False).index[:300]
    top
    
    Index(['Pcsk2', 'Dcdc2a', 'Ank', 'Gng12', 'Top2a', 'Pak3', 'Tmem163', 'Nfib',
           'Pim2', 'Smoc1',
           ...
           'Trp53inp2', 'Ccnb1', 'Atp2a2', 'Dtl', 'Kmt2a', 'Emb', 'Rcan2',
           'Gm28172', 'Rab3b', 'Gk'],
          dtype='object', name='index', length=300)
    
    scv.pl.heatmap(adata, var_names=top, sortby='latent_time', col_color='clusters', n_convolve=100)
    

    # 驱动基因
    top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index
    scv.pl.scatter(adata, basis=top_genes[:15], ncols=5, frameon=False)
    

    var_names = ['Actn4', 'Ppp3ca', 'Cpe', 'Nnat']
    scv.pl.scatter(adata, var_names, frameon=False)
    scv.pl.scatter(adata, x='latent_time', y=var_names, frameon=False)
    

    #特定于簇的最高似然基因
    scv.tl.rank_dynamical_genes(adata, groupby='clusters')
    df = scv.get_df(adata, 'rank_dynamical_genes/names')
    df.head(5)
    
    ranking genes by cluster-specific likelihoods
        finished (0:00:05) --> added 
        'rank_dynamical_genes', sorted scores by group ids (adata.uns)
    
    Ductal Ngn3 low EP Ngn3 high EP Pre-endocrine Beta Alpha Delta Epsilon
    0 Dcdc2a Dcdc2a Rbfox3 Abcc8 Pcsk2 Cpe Pcsk2 Tox3
    1 Top2a Adk Mapre3 Tmem163 Ank Gnao1 Rap1b Rnf130
    2 Nfib Mki67 Btbd17 Gnao1 Tmem163 Pak3 Pak3 Meis2
    3 Wfdc15b Rap1gap2 Sulf2 Ank Tspan7 Pim2 Abcc8 Adk
    4 Cdk1 Top2a Tcp11 Tspan7 Map1b Map1b Slc7a14 Rap1gap2
    for cluster in ['Ductal', 'Ngn3 high EP', 'Pre-endocrine', 'Beta']:
        scv.pl.scatter(adata, df[cluster][:5], ylabel=cluster, frameon=False)
    

    df = scv.get_df(adata, 'rank_dynamical_genes/scores')
    df.head(5)
    
    Ductal Ngn3 low EP Ngn3 high EP Pre-endocrine Beta Alpha Delta Epsilon
    0 0.59 0.59 0.53 0.73 0.90 0.61 0.96 0.61
    1 0.56 0.53 0.50 0.54 0.70 0.57 0.70 0.61
    2 0.56 0.51 0.47 0.53 0.59 0.54 0.67 0.59
    3 0.50 0.50 0.46 0.53 0.59 0.54 0.62 0.57
    4 0.49 0.49 0.45 0.51 0.57 0.51 0.60 0.55
    [参考链接1](https://scvelo.readthedocs.io/DynamicalModeling.html)
    [参考链接2](https://www.bilibili.com/read/cv7764833/)
    
  • 相关阅读:
    【Linux】5.5 Shell运算符
    【Linux】5.4 Shell数组
    【Linux】5.3 Shell字符串
    【Linux】5.2 Shell变量
    【Linux】5.1 Shell简介
    【Linux】3.11 包管理工具(RPM和YUM)
    【Linux】3.10 进程管理(重点)
    【Linux】3.9 网络配置
    【Linux】3.8 Linux磁盘分区、挂载
    【Linux】3.7 定时任务调度
  • 原文地址:https://www.cnblogs.com/wailaifeike/p/14512842.html
Copyright © 2011-2022 走看看