R/registration_stats_enrichment.R
registration_stats_enrichment.Rd
This function computes the gene enrichment t-statistics (one group > the
rest). These t-statistics are the ones typically used for spatial
registration with layer_stat_cor()
and related functions.
registration_stats_enrichment(
sce_pseudo,
block_cor,
covars = NULL,
var_registration = "registration_variable",
var_sample_id = "registration_sample_id",
gene_ensembl = NULL,
gene_name = NULL
)
The output of registration_pseudobulk()
.
A numeric(1)
computed with registration_block_cor()
.
A character()
with names of sample-level covariates.
A character(1)
specifying the colData(sce_pseudo)
variable of interest against which will be used for computing the relevant
statistics.
A character(1)
specifying the colData(sce_pseudo)
variable with the sample ID.
A character(1)
specifying the rowData(sce_pseudo)
column with the ENSEMBL gene IDs. This will be used by layer_stat_cor()
.
A character(1)
specifying the rowData(sce_pseudo)
column with the gene names (symbols).
A data.frame()
with the enrichment statistical results. This is
similar to fetch_data("modeling_results")$enrichment
.
Other spatial registration and statistical modeling functions:
registration_block_cor()
,
registration_model()
,
registration_pseudobulk()
,
registration_stats_anova()
,
registration_stats_pairwise()
,
registration_wrapper()
example("registration_block_cor", package = "spatialLIBD")
#>
#> rgst__> example("registration_model", package = "spatialLIBD")
#>
#> rgstr_> example("registration_pseudobulk", package = "spatialLIBD")
#>
#> rgstr_> ## Ensure reproducibility of example data
#> rgstr_> set.seed(20220907)
#>
#> rgstr_> ## Generate example data
#> rgstr_> sce <- scuttle::mockSCE()
#>
#> rgstr_> ## Add some sample IDs
#> rgstr_> sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE)
#>
#> rgstr_> ## Add a sample-level covariate: age
#> rgstr_> ages <- rnorm(5, mean = 20, sd = 4)
#>
#> rgstr_> names(ages) <- LETTERS[1:5]
#>
#> rgstr_> sce$age <- ages[sce$sample_id]
#>
#> rgstr_> ## Add gene-level information
#> rgstr_> rowData(sce)$ensembl <- paste0("ENSG", seq_len(nrow(sce)))
#>
#> rgstr_> rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce)))
#>
#> rgstr_> ## Pseudo-bulk
#> rgstr_> sce_pseudo <- registration_pseudobulk(sce, "Cell_Cycle", "sample_id", c("age"), min_ncells = NULL)
#> 2024-07-26 23:49:09.351497 make pseudobulk object
#> 2024-07-26 23:49:09.508425 drop lowly expressed genes
#> 2024-07-26 23:49:09.563072 normalize expression
#>
#> rgstr_> colData(sce_pseudo)
#> DataFrame with 20 rows and 8 columns
#> Mutation_Status Cell_Cycle Treatment sample_id age
#> <character> <character> <character> <character> <numeric>
#> A_G0 NA G0 NA A 19.1872
#> B_G0 NA G0 NA B 25.3496
#> C_G0 NA G0 NA C 24.1802
#> D_G0 NA G0 NA D 15.5211
#> E_G0 NA G0 NA E 20.9701
#> ... ... ... ... ... ...
#> A_S NA S NA A 19.1872
#> B_S NA S NA B 25.3496
#> C_S NA S NA C 24.1802
#> D_S NA S NA D 15.5211
#> E_S NA S NA E 20.9701
#> registration_variable registration_sample_id ncells
#> <character> <character> <integer>
#> A_G0 G0 A 8
#> B_G0 G0 B 13
#> C_G0 G0 C 9
#> D_G0 G0 D 7
#> E_G0 G0 E 10
#> ... ... ... ...
#> A_S S A 12
#> B_S S B 8
#> C_S S C 7
#> D_S S D 14
#> E_S S E 11
#>
#> rgstr_> registration_mod <- registration_model(sce_pseudo, "age")
#> 2024-07-26 23:49:09.635598 create model matrix
#>
#> rgstr_> head(registration_mod)
#> registration_variableG0 registration_variableG1 registration_variableG2M
#> A_G0 1 0 0
#> B_G0 1 0 0
#> C_G0 1 0 0
#> D_G0 1 0 0
#> E_G0 1 0 0
#> A_G1 0 1 0
#> registration_variableS age
#> A_G0 0 19.18719
#> B_G0 0 25.34965
#> C_G0 0 24.18019
#> D_G0 0 15.52107
#> E_G0 0 20.97006
#> A_G1 0 19.18719
#>
#> rgst__> block_cor <- registration_block_cor(sce_pseudo, registration_mod)
#> 2024-07-26 23:49:09.646796 run duplicateCorrelation()
#> 2024-07-26 23:49:10.950542 The estimated correlation is: -0.0187869166526901
results_enrichment <- registration_stats_enrichment(sce_pseudo,
block_cor, "age",
gene_ensembl = "ensembl", gene_name = "gene_name"
)
#> 2024-07-26 23:49:10.95432 computing enrichment statistics
#> 2024-07-26 23:49:11.070725 extract and reformat enrichment results
head(results_enrichment)
#> t_stat_G0 t_stat_G1 t_stat_G2M t_stat_S p_value_G0 p_value_G1
#> Gene_0001 0.1482017 0.5610669 -0.3612235 -0.3458508 0.88374480 0.58130631
#> Gene_0002 1.1913621 -0.4218015 0.1861521 -0.9362781 0.24817889 0.67790173
#> Gene_0003 0.3911563 -0.1708744 -1.1308523 0.8936891 0.70003266 0.86612822
#> Gene_0004 -0.2261922 0.7745193 -0.3413959 -0.1966017 0.82346696 0.44815103
#> Gene_0005 -2.8506769 0.0763176 1.3501762 1.0489289 0.01022576 0.93996395
#> Gene_0006 0.6567980 -2.0933725 1.9447842 -0.5422089 0.51918624 0.04995892
#> p_value_G2M p_value_S fdr_G0 fdr_G1 fdr_G2M fdr_S
#> Gene_0001 0.7219173 0.7332538 0.9877448 0.9964804 0.9852245 0.9874654
#> Gene_0002 0.8542983 0.3608759 0.9610612 0.9964804 0.9852245 0.9419020
#> Gene_0003 0.2721883 0.3826660 0.9875616 0.9964804 0.9380488 0.9419020
#> Gene_0004 0.7365517 0.8462264 0.9875616 0.9964804 0.9852245 0.9874654
#> Gene_0005 0.1928228 0.3073674 0.8705766 0.9964804 0.9327794 0.9354977
#> Gene_0006 0.0667520 0.5939759 0.9715681 0.9964804 0.9059695 0.9745520
#> logFC_G0 logFC_G1 logFC_G2M logFC_S ensembl gene
#> Gene_0001 0.0714226 0.26841193 -0.1735825 -0.1662520 ENSG1 gene1
#> Gene_0002 1.1391547 -0.41614757 0.1843559 -0.9073630 ENSG2 gene2
#> Gene_0003 0.0746434 -0.03279529 -0.2096408 0.1677927 ENSG3 gene3
#> Gene_0004 -0.1509837 0.50974778 -0.2274905 -0.1312736 ENSG4 gene4
#> Gene_0005 -2.2969018 0.07345877 1.2416624 0.9817806 ENSG5 gene5
#> Gene_0006 0.3707513 -1.07744072 1.0138450 -0.3071555 ENSG6 gene6
## Specifying `block_cor = NaN` then ignores the correlation structure
results_enrichment_nan <- registration_stats_enrichment(sce_pseudo,
block_cor = NaN, "age",
gene_ensembl = "ensembl", gene_name = "gene_name"
)
#> 2024-07-26 23:49:11.10365 computing enrichment statistics
#> 2024-07-26 23:49:11.206738 extract and reformat enrichment results
head(results_enrichment_nan)
#> t_stat_G0 t_stat_G1 t_stat_G2M t_stat_S p_value_G0 p_value_G1
#> Gene_0001 0.1497747 0.56711100 -0.3650801 -0.3495404 0.88252049 0.57727506
#> Gene_0002 1.2045227 -0.42617266 0.1880666 -0.9463404 0.24317510 0.67476888
#> Gene_0003 0.3947677 -0.17243577 -1.1419417 0.9022243 0.69740991 0.86491760
#> Gene_0004 -0.2301639 0.78851532 -0.3474114 -0.2000511 0.82042384 0.44011889
#> Gene_0005 -2.8587641 0.07646923 1.3531129 1.0511322 0.01004623 0.93984490
#> Gene_0006 0.6606524 -2.10817213 1.9581833 -0.5453678 0.51676445 0.04851534
#> p_value_G2M p_value_S fdr_G0 fdr_G1 fdr_G2M fdr_S
#> Gene_0001 0.71908334 0.7305269 0.9869999 0.9961051 0.9838800 0.9869894
#> Gene_0002 0.85281819 0.3558526 0.9512827 0.9961051 0.9838800 0.9361872
#> Gene_0003 0.26766387 0.3782304 0.9863868 0.9961051 0.9269203 0.9361872
#> Gene_0004 0.73210050 0.8435656 0.9863868 0.9961051 0.9838800 0.9869894
#> Gene_0005 0.19189934 0.3063803 0.8538025 0.9961051 0.9167840 0.9269610
#> Gene_0006 0.06505398 0.5918443 0.9659963 0.9961051 0.8763496 0.9719602
#> logFC_G0 logFC_G1 logFC_G2M logFC_S ensembl gene
#> Gene_0001 0.0714226 0.26841193 -0.1735825 -0.1662520 ENSG1 gene1
#> Gene_0002 1.1391547 -0.41614757 0.1843559 -0.9073630 ENSG2 gene2
#> Gene_0003 0.0746434 -0.03279529 -0.2096408 0.1677927 ENSG3 gene3
#> Gene_0004 -0.1509837 0.50974778 -0.2274905 -0.1312736 ENSG4 gene4
#> Gene_0005 -2.2969018 0.07345877 1.2416624 0.9817806 ENSG5 gene5
#> Gene_0006 0.3707513 -1.07744072 1.0138450 -0.3071555 ENSG6 gene6