phosphosite/Gene set over-representation analysis — pathwayOverrepresent • PhosR

This function performes phosphosite (or gene) set over-representation analysis using Fisher's exact test.

pathwayOverrepresent(geneSet, annotation, universe, alter = "greater")

Arguments

geneSet: an array of gene or phosphosite IDs (IDs are gene symbols etc that match to your pathway annotation list).
annotation: a list of pathways with each element containing an array of gene or phosphosite IDs.
universe: the universe/backgrond of all genes or phosphosites in your profiled dataset.
alter: test for enrichment ('greater', default), depletion ('less'), or 'two.sided'.

Value

A matrix of pathways and their associated substrates and p-values.

Examples

# \donttest{
library(limma)
library(org.Rn.eg.db)
#> Loading required package: AnnotationDbi
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> 
#> Attaching package: ‘BiocGenerics’
#> The following object is masked from ‘package:limma’:
#> 
#>     plotMA
#> The following objects are masked from ‘package:stats’:
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from ‘package:base’:
#> 
#>     Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
#>     as.data.frame, basename, cbind, colnames, dirname, do.call,
#>     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#>     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#>     pmin.int, rank, rbind, rownames, sapply, setdiff, table, tapply,
#>     union, unique, unsplit, which.max, which.min
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Loading required package: IRanges
#> Loading required package: S4Vectors
#> 
#> Attaching package: ‘S4Vectors’
#> The following object is masked from ‘package:utils’:
#> 
#>     findMatches
#> The following objects are masked from ‘package:base’:
#> 
#>     I, expand.grid, unname
#> 
library(reactome.db)
library(annotate)
#> Loading required package: XML


data('phospho_L6_ratio_pe')
data('SPSs')

ppe <- phospho.L6.ratio.pe
sites = paste(sapply(GeneSymbol(ppe), function(x)x),";",
    sapply(Residue(ppe), function(x)x),
    sapply(Site(ppe), function(x)x),
    ";", sep = "")
grps = gsub("_.+", "", colnames(ppe))
design = model.matrix(~ grps - 1)
ctl = which(sites %in% SPSs)
ppe = RUVphospho(ppe, M = design, k = 3, ctl = ctl)

phosphoL6 = SummarizedExperiment::assay(ppe, "normalised")
                                  
# fit linear model for each phosphosite
f <- grps
X <- model.matrix(~ f - 1)
fit <- lmFit(phosphoL6, X)

# extract top-ranked phosphosites for each condition compared to basal
table.AICAR <- topTable(eBayes(fit), number=Inf, coef = 1)
table.Ins <- topTable(eBayes(fit), number=Inf, coef = 3)
table.AICARIns <- topTable(eBayes(fit), number=Inf, coef = 2)

DE1.RUV <- c(sum(table.AICAR[,'adj.P.Val'] < 0.05),
    sum(table.Ins[,'adj.P.Val'] < 0.05),
    sum(table.AICARIns[,'adj.P.Val'] < 0.05))

# extract top-ranked phosphosites for each group comparison
contrast.matrix1 <- makeContrasts(fAICARIns-fIns, levels=X)
contrast.matrix2 <- makeContrasts(fAICARIns-fAICAR, levels=X)
fit1 <- contrasts.fit(fit, contrast.matrix1)
fit2 <- contrasts.fit(fit, contrast.matrix2)
table.AICARInsVSIns <- topTable(eBayes(fit1), number=Inf)
table.AICARInsVSAICAR <- topTable(eBayes(fit2), number=Inf)

DE2.RUV <- c(sum(table.AICARInsVSIns[,'adj.P.Val'] < 0.05),
    sum(table.AICARInsVSAICAR[,'adj.P.Val'] < 0.05))

o <- rownames(table.AICARInsVSIns)
Tc <- cbind(table.Ins[o,'logFC'], table.AICAR[o,'logFC'],
            table.AICARIns[o,'logFC'])
rownames(Tc) = gsub('(.*)(;[A-Z])([0-9]+)(;)', '\\1;\\3;', o)
colnames(Tc) <- c('Ins', 'AICAR', 'AICAR+Ins')

# summary phosphosite-level information to proteins for performing downstream
# gene-centric analyses.
Tc.gene <- phosCollapse(Tc, id=gsub(';.+', '', rownames(Tc)),
    stat=apply(abs(Tc), 1, max), by = 'max')
geneSet <- names(sort(Tc.gene[,1],
                    decreasing = TRUE))[seq(round(nrow(Tc.gene) * 0.1))]
#lapply(PhosphoSite.rat, function(x){gsub(';[STY]', ';', x)})


# Preparing Reactome annotation for our pathways analysis
pathways = as.list(reactomePATHID2EXTID)

path_names = as.list(reactomePATHID2NAME)
name_id = match(names(pathways), names(path_names))
names(pathways) = unlist(path_names)[name_id]

pathways = pathways[which(grepl("Rattus norvegicus", names(pathways), 
    ignore.case = TRUE))]

pathways = lapply(pathways, function(path) {
    gene_name = unname(getSYMBOL(path, data = "org.Rn.eg"))
    toupper(unique(gene_name))
})


# 1D gene-centric pathway analysis
path1 <- pathwayOverrepresent(geneSet, annotation=pathways,
    universe = rownames(Tc.gene), alter = 'greater')
# }