site_gene_analysis.RmdWhile 1, 2, and 3D pathway analyses are useful for data generated from experiments with different treatment/conditions, analysis designed for time-course data may be better suited to analysis experiments that profile multiple time points.
Here, we will apply ClueR which is an R package specifically designed for time-course proteomic and phosphoproteomic data analysis Yang et al. 2015.
We will load the PhosR package with few other packages we will use for this tutorial.
suppressPackageStartupMessages({
  library(parallel)
  library(ggplot2)
  library(ClueR)
  library(reactome.db)
  library(org.Mm.eg.db)
  library(annotate)
  library(PhosR)
})We will load a dataset integrated from two time-course datasets of early and intermediate insulin signalling in mouse liver upon insulin stimulation to demonstrate the time-course phosphoproteomic data analyses.
data("phospho.liver.Ins.TC.ratio.RUV.pe")
ppe <- phospho.liver.Ins.TC.ratio.RUV.pe
ppe
#> class: PhosphoExperiment 
#> dim: 800 90 
#> metadata(0):
#> assays(1): Quantification
#> rownames(800): LARP7;256; SRSF10;131; ... SIK3;493; GSK3A;21;
#> rowData names(0):
#> colnames(90): Intensity.Liver_Ins_0s_Bio7 Intensity.Liver_Ins_0s_Bio8
#>   ... Intensity.Liver_Ins_10m_Bio5 Intensity.Liver_Ins_10m_Bio6
#> colData names(0):Let us start with gene-centric analysis. Such analysis can be directly applied to proteomics data. It can also be applied to phosphoproteomic data by using the phosCollapse function to summarise phosphosite information to proteins.
# take grouping information
grps <- sapply(strsplit(colnames(ppe), "_"), 
            function(x)x[3])
# select differentially phosphorylated sites
sites.p <- matANOVA(ppe@assays@data$Quantification, 
                    grps)
ppm <- meanAbundance(ppe@assays@data$Quantification, grps)
sel <- which((sites.p < 0.05) & (rowSums(abs(ppm) > 1) != 0))
ppm_filtered <- ppm[sel,]
# summarise phosphosites information into gene level
ppm_gene <- phosCollapse(ppm_filtered, 
            gsub(";.+", "", rownames(ppm_filtered)), 
                stat = apply(abs(ppm_filtered), 1, max), by = "max")
# perform ClueR to identify optimal number of clusters
pathways = as.list(reactomePATHID2EXTID)
pathways = pathways[which(grepl("R-MMU", names(pathways), ignore.case = TRUE))]
pathways = lapply(pathways, function(path) {
    gene_name = unname(getSYMBOL(path, data = "org.Mm.eg"))
    toupper(unique(gene_name))
})
RNGkind("L'Ecuyer-CMRG")
set.seed(123)
c1 <- runClue(ppm_gene, annotation=pathways, 
            kRange = seq(2,10), rep = 5, effectiveSize = c(5, 100), 
            pvalueCutoff = 0.05, alpha = 0.5)
# Visualise the evaluation results
data <- data.frame(Success=as.numeric(c1$evlMat), Freq=rep(seq(2,10), each=5))
myplot <- ggplot(data, aes(x=Freq, y=Success)) + 
    geom_boxplot(aes(x = factor(Freq), fill="gray")) +
    stat_smooth(method="loess", colour="red", size=3, span = 0.5) +
    xlab("# of cluster") + 
    ylab("Enrichment score") + 
    theme_classic()
myplot
set.seed(123)
best <- clustOptimal(c1, rep=5, mfrow=c(2, 3), visualize = TRUE)
Phosphosite-centric analyses will perform using kinase-substrate annotation information from PhosphoSitePlus.
data("PhosphoSitePlus")
RNGkind("L'Ecuyer-CMRG")
set.seed(1)
PhosphoSite.mouse2 = mapply(function(kinase) {
  gsub("(.*)(;[A-Z])([0-9]+;)", "\\1;\\3", kinase)
}, PhosphoSite.mouse)
# perform ClueR to identify optimal number of clusters
c3 <- runClue(ppm_filtered, annotation=PhosphoSite.mouse2, kRange = 2:10, rep = 5, effectiveSize = c(5, 100), pvalueCutoff = 0.05, alpha = 0.5)
# Visualise the evaluation results
data <- data.frame(Success=as.numeric(c3$evlMat), Freq=rep(2:10, each=5))
myplot <- ggplot(data, aes(x=Freq, y=Success)) + geom_boxplot(aes(x = factor(Freq), fill="gray"))+
  stat_smooth(method="loess", colour="red", size=3, span = 0.5) + xlab("# of cluster")+ ylab("Enrichment score")+theme_classic()
myplot
set.seed(1)
best <- clustOptimal(c3, rep=10, mfrow=c(2, 3), visualize = TRUE)
# Finding enriched pathways from each cluster
best$enrichList
#>           size
#> [1,] "PRKACA" "0.000184676866298047" "5" 
#>      substrates                                                
#> [1,] "NR1H3;196;|MARCKS;163;|PRKACA;339;|ITPR1;1755;|SIK3;493;"
#> 
#> $`cluster 3`
#>      kinase         pvalue                 size
#> [1,] "Humphrey.Akt" "0.000162969329853963" "5" 
#> [2,] "Yang.Akt"     "0.000165386907010959" "6" 
#>      substrates                                                         
#> [1,] "TSC2;939;|PFKFB2;486;|FOXO3;252;|FOXO1;316;|GSK3A;21;"            
#> [2,] "AKT1S1;247;|TSC2;939;|PFKFB2;486;|FOXO3;252;|FOXO1;316;|GSK3A;21;"
sessionInfo()
#> R version 4.4.1 (2024-06-14)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS Sonoma 14.7
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
#> 
#> Random number generation:
#>  RNG:     L'Ecuyer-CMRG 
#>  Normal:  Inversion 
#>  Sample:  Rejection 
#>  
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> time zone: UTC
#> tzcode source: internal
#> 
#> attached base packages:
#> [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
#> [8] methods   base     
#> 
#> other attached packages:
#>  [1] PhosR_1.13.1         annotate_1.80.0      XML_3.99-0.17       
#>  [4] org.Mm.eg.db_3.18.0  reactome.db_1.86.2   AnnotationDbi_1.64.1
#>  [7] IRanges_2.36.0       S4Vectors_0.40.2     Biobase_2.62.0      
#> [10] BiocGenerics_0.48.1  ClueR_1.4.2          ggplot2_3.5.1       
#> 
#> loaded via a namespace (and not attached):
#>   [1] DBI_1.2.3                   bitops_1.0-9               
#>   [3] gridExtra_2.3               rlang_1.1.4                
#>   [5] magrittr_2.0.3              matrixStats_1.4.1          
#>   [7] e1071_1.7-16                compiler_4.4.1             
#>   [9] RSQLite_2.3.7               mgcv_1.9-1                 
#>  [11] reshape2_1.4.4              png_0.1-8                  
#>  [13] systemfonts_1.1.0           vctrs_0.6.5                
#>  [15] stringr_1.5.1               pkgconfig_2.0.3            
#>  [17] shape_1.4.6.1               crayon_1.5.3               
#>  [19] fastmap_1.2.0               backports_1.5.0            
#>  [21] XVector_0.42.0              labeling_0.4.3             
#>  [23] utf8_1.2.4                  rmarkdown_2.28             
#>  [25] preprocessCore_1.64.0       ragg_1.3.3                 
#>  [27] network_1.18.2              purrr_1.0.2                
#>  [29] bit_4.5.0                   xfun_0.48                  
#>  [31] zlibbioc_1.48.2             cachem_1.1.0               
#>  [33] GenomeInfoDb_1.38.8         jsonlite_1.8.9             
#>  [35] blob_1.2.4                  highr_0.11                 
#>  [37] DelayedArray_0.28.0         broom_1.0.7                
#>  [39] R6_2.5.1                    stringi_1.8.4              
#>  [41] bslib_0.8.0                 RColorBrewer_1.1-3         
#>  [43] limma_3.58.1                GGally_2.2.1               
#>  [45] car_3.1-3                   GenomicRanges_1.54.1       
#>  [47] jquerylib_0.1.4             Rcpp_1.0.13                
#>  [49] SummarizedExperiment_1.32.0 knitr_1.48                 
#>  [51] splines_4.4.1               igraph_2.1.1               
#>  [53] Matrix_1.7-0                tidyselect_1.2.1           
#>  [55] abind_1.4-8                 yaml_2.3.10                
#>  [57] viridis_0.6.5               lattice_0.22-6             
#>  [59] tibble_3.2.1                plyr_1.8.9                 
#>  [61] withr_3.0.2                 KEGGREST_1.42.0            
#>  [63] coda_0.19-4.1               evaluate_1.0.1             
#>  [65] desc_1.4.3                  ggstats_0.7.0              
#>  [67] proxy_0.4-27                circlize_0.4.16            
#>  [69] Biostrings_2.70.3           pillar_1.9.0               
#>  [71] BiocManager_1.30.25         ggpubr_0.6.0               
#>  [73] carData_3.0-5               MatrixGenerics_1.14.0      
#>  [75] generics_0.1.3              RCurl_1.98-1.16            
#>  [77] munsell_0.5.1               scales_1.3.0               
#>  [79] BiocStyle_2.30.0            xtable_1.8-4               
#>  [81] class_7.3-22                glue_1.8.0                 
#>  [83] pheatmap_1.0.12             tools_4.4.1                
#>  [85] dendextend_1.18.1           ggsignif_0.6.4             
#>  [87] fs_1.6.4                    grid_4.4.1                 
#>  [89] tidyr_1.3.1                 colorspace_2.1-1           
#>  [91] nlme_3.1-164                GenomeInfoDbData_1.2.11    
#>  [93] Formula_1.2-5               cli_3.6.3                  
#>  [95] ruv_0.9.7.1                 textshaping_0.4.0          
#>  [97] fansi_1.0.6                 S4Arrays_1.2.1             
#>  [99] viridisLite_0.4.2           ggdendro_0.2.0             
#> [101] dplyr_1.1.4                 pcaMethods_1.94.0          
#> [103] gtable_0.3.6                rstatix_0.7.2              
#> [105] sass_0.4.9                  digest_0.6.37              
#> [107] SparseArray_1.2.4           farver_2.1.2               
#> [109] htmlwidgets_1.6.4           memoise_2.0.1              
#> [111] htmltools_0.5.8.1           pkgdown_2.1.1              
#> [113] lifecycle_1.0.4             httr_1.4.7                 
#> [115] statnet.common_4.10.0       statmod_1.5.0              
#> [117] GlobalOptions_0.1.2         bit64_4.5.2                
#> [119] MASS_7.3-60.2