This function computes the gene enrichment t-statistics (one group > the rest). These t-statistics are the ones typically used for spatial registration with layer_stat_cor() and related functions.

registration_stats_enrichment(
  sce_pseudo,
  block_cor,
  covars = NULL,
  var_registration = "registration_variable",
  var_sample_id = "registration_sample_id",
  gene_ensembl = NULL,
  gene_name = NULL
)

Arguments

sce_pseudo

The output of registration_pseudobulk().

block_cor

A numeric(1) computed with registration_block_cor().

covars

A character() with names of sample-level covariates.

var_registration

A character(1) specifying the colData(sce_pseudo) variable of interest against which will be used for computing the relevant statistics.

var_sample_id

A character(1) specifying the colData(sce_pseudo) variable with the sample ID.

gene_ensembl

A character(1) specifying the rowData(sce_pseudo) column with the ENSEMBL gene IDs. This will be used by layer_stat_cor().

gene_name

A character(1) specifying the rowData(sce_pseudo) column with the gene names (symbols).

Value

A data.frame() with the enrichment statistical results. This is similar to fetch_data("modeling_results")$enrichment.

See also

Examples

example("registration_block_cor", package = "spatialLIBD")
#> 
#> rgst__> example("registration_model", package = "spatialLIBD")
#> 
#> rgstr_> example("registration_pseudobulk", package = "spatialLIBD")
#> 
#> rgstr_> ## Ensure reproducibility of example data
#> rgstr_> set.seed(20220907)
#> 
#> rgstr_> ## Generate example data
#> rgstr_> sce <- scuttle::mockSCE()
#> 
#> rgstr_> ## Add some sample IDs
#> rgstr_> sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE)
#> 
#> rgstr_> ## Add a sample-level covariate: age
#> rgstr_> ages <- rnorm(5, mean = 20, sd = 4)
#> 
#> rgstr_> names(ages) <- LETTERS[1:5]
#> 
#> rgstr_> sce$age <- ages[sce$sample_id]
#> 
#> rgstr_> ## Add gene-level information
#> rgstr_> rowData(sce)$ensembl <- paste0("ENSG", seq_len(nrow(sce)))
#> 
#> rgstr_> rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce)))
#> 
#> rgstr_> ## Pseudo-bulk
#> rgstr_> sce_pseudo <- registration_pseudobulk(sce, "Cell_Cycle", "sample_id", c("age"), min_ncells = NULL)
#> 2024-07-26 23:49:09.351497 make pseudobulk object
#> 2024-07-26 23:49:09.508425 drop lowly expressed genes
#> 2024-07-26 23:49:09.563072 normalize expression
#> 
#> rgstr_> colData(sce_pseudo)
#> DataFrame with 20 rows and 8 columns
#>      Mutation_Status  Cell_Cycle   Treatment   sample_id       age
#>          <character> <character> <character> <character> <numeric>
#> A_G0              NA          G0          NA           A   19.1872
#> B_G0              NA          G0          NA           B   25.3496
#> C_G0              NA          G0          NA           C   24.1802
#> D_G0              NA          G0          NA           D   15.5211
#> E_G0              NA          G0          NA           E   20.9701
#> ...              ...         ...         ...         ...       ...
#> A_S               NA           S          NA           A   19.1872
#> B_S               NA           S          NA           B   25.3496
#> C_S               NA           S          NA           C   24.1802
#> D_S               NA           S          NA           D   15.5211
#> E_S               NA           S          NA           E   20.9701
#>      registration_variable registration_sample_id    ncells
#>                <character>            <character> <integer>
#> A_G0                    G0                      A         8
#> B_G0                    G0                      B        13
#> C_G0                    G0                      C         9
#> D_G0                    G0                      D         7
#> E_G0                    G0                      E        10
#> ...                    ...                    ...       ...
#> A_S                      S                      A        12
#> B_S                      S                      B         8
#> C_S                      S                      C         7
#> D_S                      S                      D        14
#> E_S                      S                      E        11
#> 
#> rgstr_> registration_mod <- registration_model(sce_pseudo, "age")
#> 2024-07-26 23:49:09.635598 create model matrix
#> 
#> rgstr_> head(registration_mod)
#>      registration_variableG0 registration_variableG1 registration_variableG2M
#> A_G0                       1                       0                        0
#> B_G0                       1                       0                        0
#> C_G0                       1                       0                        0
#> D_G0                       1                       0                        0
#> E_G0                       1                       0                        0
#> A_G1                       0                       1                        0
#>      registration_variableS      age
#> A_G0                      0 19.18719
#> B_G0                      0 25.34965
#> C_G0                      0 24.18019
#> D_G0                      0 15.52107
#> E_G0                      0 20.97006
#> A_G1                      0 19.18719
#> 
#> rgst__> block_cor <- registration_block_cor(sce_pseudo, registration_mod)
#> 2024-07-26 23:49:09.646796 run duplicateCorrelation()
#> 2024-07-26 23:49:10.950542 The estimated correlation is: -0.0187869166526901
results_enrichment <- registration_stats_enrichment(sce_pseudo,
    block_cor, "age",
    gene_ensembl = "ensembl", gene_name = "gene_name"
)
#> 2024-07-26 23:49:10.95432 computing enrichment statistics
#> 2024-07-26 23:49:11.070725 extract and reformat enrichment results
head(results_enrichment)
#>            t_stat_G0  t_stat_G1 t_stat_G2M   t_stat_S p_value_G0 p_value_G1
#> Gene_0001  0.1482017  0.5610669 -0.3612235 -0.3458508 0.88374480 0.58130631
#> Gene_0002  1.1913621 -0.4218015  0.1861521 -0.9362781 0.24817889 0.67790173
#> Gene_0003  0.3911563 -0.1708744 -1.1308523  0.8936891 0.70003266 0.86612822
#> Gene_0004 -0.2261922  0.7745193 -0.3413959 -0.1966017 0.82346696 0.44815103
#> Gene_0005 -2.8506769  0.0763176  1.3501762  1.0489289 0.01022576 0.93996395
#> Gene_0006  0.6567980 -2.0933725  1.9447842 -0.5422089 0.51918624 0.04995892
#>           p_value_G2M p_value_S    fdr_G0    fdr_G1   fdr_G2M     fdr_S
#> Gene_0001   0.7219173 0.7332538 0.9877448 0.9964804 0.9852245 0.9874654
#> Gene_0002   0.8542983 0.3608759 0.9610612 0.9964804 0.9852245 0.9419020
#> Gene_0003   0.2721883 0.3826660 0.9875616 0.9964804 0.9380488 0.9419020
#> Gene_0004   0.7365517 0.8462264 0.9875616 0.9964804 0.9852245 0.9874654
#> Gene_0005   0.1928228 0.3073674 0.8705766 0.9964804 0.9327794 0.9354977
#> Gene_0006   0.0667520 0.5939759 0.9715681 0.9964804 0.9059695 0.9745520
#>             logFC_G0    logFC_G1  logFC_G2M    logFC_S ensembl  gene
#> Gene_0001  0.0714226  0.26841193 -0.1735825 -0.1662520   ENSG1 gene1
#> Gene_0002  1.1391547 -0.41614757  0.1843559 -0.9073630   ENSG2 gene2
#> Gene_0003  0.0746434 -0.03279529 -0.2096408  0.1677927   ENSG3 gene3
#> Gene_0004 -0.1509837  0.50974778 -0.2274905 -0.1312736   ENSG4 gene4
#> Gene_0005 -2.2969018  0.07345877  1.2416624  0.9817806   ENSG5 gene5
#> Gene_0006  0.3707513 -1.07744072  1.0138450 -0.3071555   ENSG6 gene6

## Specifying `block_cor = NaN` then ignores the correlation structure
results_enrichment_nan <- registration_stats_enrichment(sce_pseudo,
    block_cor = NaN, "age",
    gene_ensembl = "ensembl", gene_name = "gene_name"
)
#> 2024-07-26 23:49:11.10365 computing enrichment statistics
#> 2024-07-26 23:49:11.206738 extract and reformat enrichment results
head(results_enrichment_nan)
#>            t_stat_G0   t_stat_G1 t_stat_G2M   t_stat_S p_value_G0 p_value_G1
#> Gene_0001  0.1497747  0.56711100 -0.3650801 -0.3495404 0.88252049 0.57727506
#> Gene_0002  1.2045227 -0.42617266  0.1880666 -0.9463404 0.24317510 0.67476888
#> Gene_0003  0.3947677 -0.17243577 -1.1419417  0.9022243 0.69740991 0.86491760
#> Gene_0004 -0.2301639  0.78851532 -0.3474114 -0.2000511 0.82042384 0.44011889
#> Gene_0005 -2.8587641  0.07646923  1.3531129  1.0511322 0.01004623 0.93984490
#> Gene_0006  0.6606524 -2.10817213  1.9581833 -0.5453678 0.51676445 0.04851534
#>           p_value_G2M p_value_S    fdr_G0    fdr_G1   fdr_G2M     fdr_S
#> Gene_0001  0.71908334 0.7305269 0.9869999 0.9961051 0.9838800 0.9869894
#> Gene_0002  0.85281819 0.3558526 0.9512827 0.9961051 0.9838800 0.9361872
#> Gene_0003  0.26766387 0.3782304 0.9863868 0.9961051 0.9269203 0.9361872
#> Gene_0004  0.73210050 0.8435656 0.9863868 0.9961051 0.9838800 0.9869894
#> Gene_0005  0.19189934 0.3063803 0.8538025 0.9961051 0.9167840 0.9269610
#> Gene_0006  0.06505398 0.5918443 0.9659963 0.9961051 0.8763496 0.9719602
#>             logFC_G0    logFC_G1  logFC_G2M    logFC_S ensembl  gene
#> Gene_0001  0.0714226  0.26841193 -0.1735825 -0.1662520   ENSG1 gene1
#> Gene_0002  1.1391547 -0.41614757  0.1843559 -0.9073630   ENSG2 gene2
#> Gene_0003  0.0746434 -0.03279529 -0.2096408  0.1677927   ENSG3 gene3
#> Gene_0004 -0.1509837  0.50974778 -0.2274905 -0.1312736   ENSG4 gene4
#> Gene_0005 -2.2969018  0.07345877  1.2416624  0.9817806   ENSG5 gene5
#> Gene_0006  0.3707513 -1.07744072  1.0138450 -0.3071555   ENSG6 gene6