Spatial registration: compute pairwise statistics

This function computes the gene pairwise t-statistics (one group > another, for all combinations). These t-statistics can be used for spatial registration with layer_stat_cor() and related functions. Although, they are more typically used for identifying pairwise-marker genes.

registration_stats_pairwise(
  sce_pseudo,
  registration_model,
  block_cor,
  var_registration = "registration_variable",
  var_sample_id = "registration_sample_id",
  gene_ensembl = NULL,
  gene_name = NULL
)

Arguments

sce_pseudo: The output of registration_pseudobulk().
registration_model: The output from registration_model().
block_cor: A numeric(1) computed with registration_block_cor().
var_registration: A character(1) specifying the colData(sce_pseudo) variable of interest against which will be used for computing the relevant statistics.
var_sample_id: A character(1) specifying the colData(sce_pseudo) variable with the sample ID.
gene_ensembl: A character(1) specifying the rowData(sce_pseudo) column with the ENSEMBL gene IDs. This will be used by layer_stat_cor().
gene_name: A character(1) specifying the rowData(sce_pseudo) column with the gene names (symbols).

Value

A data.frame() with the pairwise statistical results. This is similar to fetch_data("modeling_results")$pairwise.

Examples

example("registration_block_cor", package = "spatialLIBD")
#> 
#> rgst__> example("registration_model", package = "spatialLIBD")
#> 
#> rgstr_> example("registration_pseudobulk", package = "spatialLIBD")
#> 
#> rgstr_> ## Ensure reproducibility of example data
#> rgstr_> set.seed(20220907)
#> 
#> rgstr_> ## Generate example data
#> rgstr_> sce <- scuttle::mockSCE()
#> 
#> rgstr_> ## Add some sample IDs
#> rgstr_> sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE)
#> 
#> rgstr_> ## Add a sample-level covariate: age
#> rgstr_> ages <- rnorm(5, mean = 20, sd = 4)
#> 
#> rgstr_> names(ages) <- LETTERS[1:5]
#> 
#> rgstr_> sce$age <- ages[sce$sample_id]
#> 
#> rgstr_> ## Add gene-level information
#> rgstr_> rowData(sce)$ensembl <- paste0("ENSG", seq_len(nrow(sce)))
#> 
#> rgstr_> rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce)))
#> 
#> rgstr_> ## Pseudo-bulk by Cell Cycle
#> rgstr_> sce_pseudo <- registration_pseudobulk(
#> rgstr_+     sce,
#> rgstr_+     var_registration = "Cell_Cycle",
#> rgstr_+     var_sample_id = "sample_id",
#> rgstr_+     covars = c("age"),
#> rgstr_+     min_ncells = NULL
#> rgstr_+ )
#> 2025-05-09 16:31:18.106542 make pseudobulk object
#> 2025-05-09 16:31:18.225625 drop lowly expressed genes
#> 2025-05-09 16:31:18.273428 normalize expression
#> 
#> rgstr_> colData(sce_pseudo)
#> DataFrame with 20 rows and 9 columns
#>      Mutation_Status  Cell_Cycle   Treatment   sample_id       age
#>          <character> <character> <character> <character> <numeric>
#> A_G0              NA          G0          NA           A   19.1872
#> B_G0              NA          G0          NA           B   25.3496
#> C_G0              NA          G0          NA           C   24.1802
#> D_G0              NA          G0          NA           D   15.5211
#> E_G0              NA          G0          NA           E   20.9701
#> ...              ...         ...         ...         ...       ...
#> A_S               NA           S          NA           A   19.1872
#> B_S               NA           S          NA           B   25.3496
#> C_S               NA           S          NA           C   24.1802
#> D_S               NA           S          NA           D   15.5211
#> E_S               NA           S          NA           E   20.9701
#>      registration_variable registration_sample_id    ncells pseudo_sum_umi
#>                <character>            <character> <integer>      <numeric>
#> A_G0                    G0                      A         8        2946915
#> B_G0                    G0                      B        13        4922867
#> C_G0                    G0                      C         9        3398888
#> D_G0                    G0                      D         7        2630651
#> E_G0                    G0                      E        10        3761710
#> ...                    ...                    ...       ...            ...
#> A_S                      S                      A        12        4516334
#> B_S                      S                      B         8        2960685
#> C_S                      S                      C         7        2595774
#> D_S                      S                      D        14        5233560
#> E_S                      S                      E        11        4151818
#> 
#> rgstr_> registration_mod <- registration_model(sce_pseudo, "age")
#> 2025-05-09 16:31:18.40166 create model matrix
#> 
#> rgstr_> head(registration_mod)
#>      registration_variableG0 registration_variableG1 registration_variableG2M
#> A_G0                       1                       0                        0
#> B_G0                       1                       0                        0
#> C_G0                       1                       0                        0
#> D_G0                       1                       0                        0
#> E_G0                       1                       0                        0
#> A_G1                       0                       1                        0
#>      registration_variableS      age
#> A_G0                      0 19.18719
#> B_G0                      0 25.34965
#> C_G0                      0 24.18019
#> D_G0                      0 15.52107
#> E_G0                      0 20.97006
#> A_G1                      0 19.18719
#> 
#> rgst__> block_cor <- registration_block_cor(sce_pseudo, registration_mod)
#> 2025-05-09 16:31:18.414462 run duplicateCorrelation()
#> 2025-05-09 16:31:19.706942 The estimated correlation is: -0.0187869166526901
results_pairwise <- registration_stats_pairwise(sce_pseudo,
    registration_mod, block_cor,
    gene_ensembl = "ensembl", gene_name = "gene_name"
)
#> 2025-05-09 16:31:19.708846 running the baseline pairwise model
#> 2025-05-09 16:31:19.726184 computing pairwise statistics
head(results_pairwise)
#>           t_stat_G0-G1 t_stat_G0-G2M t_stat_G0-S t_stat_G1-G2M t_stat_G1-S
#> Gene_0001   -0.2393683    0.29771391  0.28880637     0.5370822   0.5281747
#> Gene_0002    0.9547055    0.58609293  1.25623276    -0.3686126   0.3015273
#> Gene_0003    0.3389469    0.89685781 -0.29386667     0.5579109  -0.6328135
#> Gene_0004   -0.5817335    0.06735952 -0.01735353     0.6490930   0.5643800
#> Gene_0005   -1.7445717   -2.60436287 -2.41309134    -0.8597912  -0.6685196
#> Gene_0006    1.7476962   -0.77609353  0.81810637    -2.5237898  -0.9295898
#>           t_stat_G2M-S p_value_G0-G1 p_value_G0-G2M p_value_G0-S p_value_G1-G2M
#> Gene_0001 -0.008907535    0.81368047     0.76952707   0.77621965     0.59816691
#> Gene_0002  0.670139826    0.35309875     0.56550988   0.22601193     0.71696428
#> Gene_0003 -1.190724479    0.73879812     0.38231306   0.77241539     0.58417423
#> Gene_0004 -0.084713052    0.56837654     0.94708094   0.98635651     0.52494641
#> Gene_0005  0.191271534    0.09909959     0.01850622   0.02738372     0.40185996
#> Gene_0006  1.594199902    0.09854299     0.44835125   0.42460931     0.02184936
#>           p_value_G1-S p_value_G2M-S fdr_G0-G1 fdr_G0-G2M  fdr_G0-S fdr_G1-G2M
#> Gene_0001    0.6042013     0.9929965 0.9971936  0.9833020 0.9800365  0.9949522
#> Gene_0002    0.7666676     0.5117648 0.9971936  0.9612205 0.9224977  0.9949522
#> Gene_0003    0.5352709     0.2501141 0.9971936  0.9493926 0.9783602  0.9949522
#> Gene_0004    0.5798624     0.9334786 0.9971936  0.9958273 0.9956820  0.9949522
#> Gene_0005    0.5127728     0.8505774 0.9971936  0.7217980 0.8946073  0.9949522
#> Gene_0006    0.3655890     0.1293024 0.9971936  0.9561207 0.9698559  0.9949522
#>           fdr_G1-S fdr_G2M-S logFC_G0-G1 logFC_G0-G2M  logFC_G0-S logFC_G1-G2M
#> Gene_0001 0.995513 0.9998989 -0.14774200   0.18375383  0.17825596    0.3314958
#> Gene_0002 0.995513 0.9727830  1.16647669   0.71609911  1.53488825   -0.4503776
#> Gene_0003 0.995513 0.9392457  0.08057902   0.21321313 -0.06986195    0.1326341
#> Gene_0004 0.995513 0.9987686 -0.49554861   0.05738008 -0.01478258    0.5529287
#> Gene_0005 0.995513 0.9882374 -1.77777042  -2.65392316 -2.45901178   -0.8761527
#> Gene_0006 0.995513 0.9374452  1.08614399  -0.48232028  0.50843007   -1.5684643
#>           logFC_G1-S  logFC_G2M-S ensembl  gene
#> Gene_0001  0.3259980 -0.005497874   ENSG1 gene1
#> Gene_0002  0.3684116  0.818789145   ENSG2 gene2
#> Gene_0003 -0.1504410 -0.283075078   ENSG3 gene3
#> Gene_0004  0.4807660 -0.072162656   ENSG4 gene4
#> Gene_0005 -0.6812414  0.194911377   ENSG5 gene5
#> Gene_0006 -0.5777139  0.990750347   ENSG6 gene6

## Specifying `block_cor = NaN` then ignores the correlation structure
results_pairwise_nan <- registration_stats_pairwise(sce_pseudo,
    registration_mod,
    block_cor = NaN,
    gene_ensembl = "ensembl", gene_name = "gene_name"
)
#> 2025-05-09 16:31:19.791678 running the baseline pairwise model
#> 2025-05-09 16:31:19.809705 computing pairwise statistics
head(results_pairwise_nan)
#>           t_stat_G0-G1 t_stat_G0-G2M t_stat_G0-S t_stat_G1-G2M t_stat_G1-S
#> Gene_0001   -0.2419770    0.30095840  0.29195379     0.5429354   0.5339308
#> Gene_0002    0.9655460    0.59274792  1.27049707    -0.3727981   0.3049511
#> Gene_0003    0.3424456    0.90611560 -0.29690010     0.5636700  -0.6393457
#> Gene_0004   -0.5922676    0.06857927 -0.01766777     0.6608469   0.5745998
#> Gene_0005   -1.7497882   -2.61215029 -2.42030683    -0.8623621  -0.6705186
#> Gene_0006    1.7620062   -0.78244810  0.82480493    -2.5444543  -0.9372012
#>           t_stat_G2M-S p_value_G0-G1 p_value_G0-G2M p_value_G0-S p_value_G1-G2M
#> Gene_0001 -0.009004609    0.81169124     0.76709393   0.77385274     0.59421797
#> Gene_0002  0.677749150    0.34779939     0.56114829   0.22101069     0.71390337
#> Gene_0003 -1.203015701    0.73621058     0.37753160   0.77013771     0.58033474
#> Gene_0004 -0.086247043    0.56146250     0.94612425   0.98610948     0.51756162
#> Gene_0005  0.191843463    0.09817164     0.01821015   0.02698596     0.40048330
#> Gene_0006  1.607253036    0.09602824     0.44470810   0.42089899     0.02094098
#>           p_value_G1-S p_value_G2M-S fdr_G0-G1 fdr_G0-G2M  fdr_G0-S fdr_G1-G2M
#> Gene_0001    0.6002984     0.9929202 0.9971067  0.9806277 0.9782050  0.9932537
#> Gene_0002    0.7641032     0.5070458 0.9971067  0.9557062 0.9058626  0.9932537
#> Gene_0003    0.5311147     0.2454479 0.9971067  0.9380457 0.9754753  0.9932537
#> Gene_0004    0.5730837     0.9322771 0.9971067  0.9957322 0.9955379  0.9932537
#> Gene_0005    0.5115293     0.8501363 0.9971067  0.6854563 0.8734187  0.9932537
#> Gene_0006    0.3617722     0.1264013 0.9971067  0.9508593 0.9618345  0.9932537
#>            fdr_G1-S fdr_G2M-S logFC_G0-G1 logFC_G0-G2M  logFC_G0-S logFC_G1-G2M
#> Gene_0001 0.9942711 0.9998953 -0.14774200   0.18375383  0.17825596    0.3314958
#> Gene_0002 0.9942711 0.9658549  1.16647669   0.71609911  1.53488825   -0.4503776
#> Gene_0003 0.9942711 0.9234394  0.08057902   0.21321313 -0.06986195    0.1326341
#> Gene_0004 0.9942711 0.9978377 -0.49554861   0.05738008 -0.01478258    0.5529287
#> Gene_0005 0.9942711 0.9864989 -1.77777042  -2.65392316 -2.45901178   -0.8761527
#> Gene_0006 0.9942711 0.9218958  1.08614399  -0.48232028  0.50843007   -1.5684643
#>           logFC_G1-S  logFC_G2M-S ensembl  gene
#> Gene_0001  0.3259980 -0.005497874   ENSG1 gene1
#> Gene_0002  0.3684116  0.818789145   ENSG2 gene2
#> Gene_0003 -0.1504410 -0.283075078   ENSG3 gene3
#> Gene_0004  0.4807660 -0.072162656   ENSG4 gene4
#> Gene_0005 -0.6812414  0.194911377   ENSG5 gene5
#> Gene_0006 -0.5777139  0.990750347   ENSG6 gene6

Arguments

Value

See also

Examples