R/registration_stats_pairwise.R
registration_stats_pairwise.Rd
This function computes the gene pairwise t-statistics (one group > another,
for all combinations). These t-statistics can be used for spatial
registration with layer_stat_cor()
and related functions. Although, they
are more typically used for identifying pairwise-marker genes.
registration_stats_pairwise(
sce_pseudo,
registration_model,
block_cor,
var_registration = "registration_variable",
var_sample_id = "registration_sample_id",
gene_ensembl = NULL,
gene_name = NULL
)
The output of registration_pseudobulk()
.
The output from registration_model()
.
A numeric(1)
computed with registration_block_cor()
.
A character(1)
specifying the colData(sce_pseudo)
variable of interest against which will be used for computing the relevant
statistics.
A character(1)
specifying the colData(sce_pseudo)
variable with the sample ID.
A character(1)
specifying the rowData(sce_pseudo)
column with the ENSEMBL gene IDs. This will be used by layer_stat_cor()
.
A character(1)
specifying the rowData(sce_pseudo)
column with the gene names (symbols).
A data.frame()
with the pairwise statistical results. This is
similar to fetch_data("modeling_results")$pairwise
.
Other spatial registration and statistical modeling functions:
registration_block_cor()
,
registration_model()
,
registration_pseudobulk()
,
registration_stats_anova()
,
registration_stats_enrichment()
,
registration_wrapper()
example("registration_block_cor", package = "spatialLIBD")
#>
#> rgst__> example("registration_model", package = "spatialLIBD")
#>
#> rgstr_> example("registration_pseudobulk", package = "spatialLIBD")
#>
#> rgstr_> ## Ensure reproducibility of example data
#> rgstr_> set.seed(20220907)
#>
#> rgstr_> ## Generate example data
#> rgstr_> sce <- scuttle::mockSCE()
#>
#> rgstr_> ## Add some sample IDs
#> rgstr_> sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE)
#>
#> rgstr_> ## Add a sample-level covariate: age
#> rgstr_> ages <- rnorm(5, mean = 20, sd = 4)
#>
#> rgstr_> names(ages) <- LETTERS[1:5]
#>
#> rgstr_> sce$age <- ages[sce$sample_id]
#>
#> rgstr_> ## Add gene-level information
#> rgstr_> rowData(sce)$ensembl <- paste0("ENSG", seq_len(nrow(sce)))
#>
#> rgstr_> rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce)))
#>
#> rgstr_> ## Pseudo-bulk
#> rgstr_> sce_pseudo <- registration_pseudobulk(sce, "Cell_Cycle", "sample_id", c("age"), min_ncells = NULL)
#> 2024-12-13 19:42:00.217931 make pseudobulk object
#> 2024-12-13 19:42:00.316147 drop lowly expressed genes
#> 2024-12-13 19:42:00.362556 normalize expression
#>
#> rgstr_> colData(sce_pseudo)
#> DataFrame with 20 rows and 8 columns
#> Mutation_Status Cell_Cycle Treatment sample_id age
#> <character> <character> <character> <character> <numeric>
#> A_G0 NA G0 NA A 19.1872
#> B_G0 NA G0 NA B 25.3496
#> C_G0 NA G0 NA C 24.1802
#> D_G0 NA G0 NA D 15.5211
#> E_G0 NA G0 NA E 20.9701
#> ... ... ... ... ... ...
#> A_S NA S NA A 19.1872
#> B_S NA S NA B 25.3496
#> C_S NA S NA C 24.1802
#> D_S NA S NA D 15.5211
#> E_S NA S NA E 20.9701
#> registration_variable registration_sample_id ncells
#> <character> <character> <integer>
#> A_G0 G0 A 8
#> B_G0 G0 B 13
#> C_G0 G0 C 9
#> D_G0 G0 D 7
#> E_G0 G0 E 10
#> ... ... ... ...
#> A_S S A 12
#> B_S S B 8
#> C_S S C 7
#> D_S S D 14
#> E_S S E 11
#>
#> rgstr_> registration_mod <- registration_model(sce_pseudo, "age")
#> 2024-12-13 19:42:00.438231 create model matrix
#>
#> rgstr_> head(registration_mod)
#> registration_variableG0 registration_variableG1 registration_variableG2M
#> A_G0 1 0 0
#> B_G0 1 0 0
#> C_G0 1 0 0
#> D_G0 1 0 0
#> E_G0 1 0 0
#> A_G1 0 1 0
#> registration_variableS age
#> A_G0 0 19.18719
#> B_G0 0 25.34965
#> C_G0 0 24.18019
#> D_G0 0 15.52107
#> E_G0 0 20.97006
#> A_G1 0 19.18719
#>
#> rgst__> block_cor <- registration_block_cor(sce_pseudo, registration_mod)
#> 2024-12-13 19:42:00.449399 run duplicateCorrelation()
#> 2024-12-13 19:42:01.716408 The estimated correlation is: -0.0187869166526901
results_pairwise <- registration_stats_pairwise(sce_pseudo,
registration_mod, block_cor,
gene_ensembl = "ensembl", gene_name = "gene_name"
)
#> 2024-12-13 19:42:01.718282 running the baseline pairwise model
#> 2024-12-13 19:42:01.735675 computing pairwise statistics
head(results_pairwise)
#> t_stat_G0-G1 t_stat_G0-G2M t_stat_G0-S t_stat_G1-G2M t_stat_G1-S
#> Gene_0001 -0.2393683 0.29771391 0.28880637 0.5370822 0.5281747
#> Gene_0002 0.9547055 0.58609293 1.25623276 -0.3686126 0.3015273
#> Gene_0003 0.3389469 0.89685781 -0.29386667 0.5579109 -0.6328135
#> Gene_0004 -0.5817335 0.06735952 -0.01735353 0.6490930 0.5643800
#> Gene_0005 -1.7445717 -2.60436287 -2.41309134 -0.8597912 -0.6685196
#> Gene_0006 1.7476962 -0.77609353 0.81810637 -2.5237898 -0.9295898
#> t_stat_G2M-S p_value_G0-G1 p_value_G0-G2M p_value_G0-S p_value_G1-G2M
#> Gene_0001 -0.008907535 0.81368047 0.76952707 0.77621965 0.59816691
#> Gene_0002 0.670139826 0.35309875 0.56550988 0.22601193 0.71696428
#> Gene_0003 -1.190724479 0.73879812 0.38231306 0.77241539 0.58417423
#> Gene_0004 -0.084713052 0.56837654 0.94708094 0.98635651 0.52494641
#> Gene_0005 0.191271534 0.09909959 0.01850622 0.02738372 0.40185996
#> Gene_0006 1.594199902 0.09854299 0.44835125 0.42460931 0.02184936
#> p_value_G1-S p_value_G2M-S fdr_G0-G1 fdr_G0-G2M fdr_G0-S fdr_G1-G2M
#> Gene_0001 0.6042013 0.9929965 0.9971936 0.9833020 0.9800365 0.9949522
#> Gene_0002 0.7666676 0.5117648 0.9971936 0.9612205 0.9224977 0.9949522
#> Gene_0003 0.5352709 0.2501141 0.9971936 0.9493926 0.9783602 0.9949522
#> Gene_0004 0.5798624 0.9334786 0.9971936 0.9958273 0.9956820 0.9949522
#> Gene_0005 0.5127728 0.8505774 0.9971936 0.7217980 0.8946073 0.9949522
#> Gene_0006 0.3655890 0.1293024 0.9971936 0.9561207 0.9698559 0.9949522
#> fdr_G1-S fdr_G2M-S logFC_G0-G1 logFC_G0-G2M logFC_G0-S logFC_G1-G2M
#> Gene_0001 0.995513 0.9998989 -0.14774200 0.18375383 0.17825596 0.3314958
#> Gene_0002 0.995513 0.9727830 1.16647669 0.71609911 1.53488825 -0.4503776
#> Gene_0003 0.995513 0.9392457 0.08057902 0.21321313 -0.06986195 0.1326341
#> Gene_0004 0.995513 0.9987686 -0.49554861 0.05738008 -0.01478258 0.5529287
#> Gene_0005 0.995513 0.9882374 -1.77777042 -2.65392316 -2.45901178 -0.8761527
#> Gene_0006 0.995513 0.9374452 1.08614399 -0.48232028 0.50843007 -1.5684643
#> logFC_G1-S logFC_G2M-S ensembl gene
#> Gene_0001 0.3259980 -0.005497874 ENSG1 gene1
#> Gene_0002 0.3684116 0.818789145 ENSG2 gene2
#> Gene_0003 -0.1504410 -0.283075078 ENSG3 gene3
#> Gene_0004 0.4807660 -0.072162656 ENSG4 gene4
#> Gene_0005 -0.6812414 0.194911377 ENSG5 gene5
#> Gene_0006 -0.5777139 0.990750347 ENSG6 gene6
## Specifying `block_cor = NaN` then ignores the correlation structure
results_pairwise_nan <- registration_stats_pairwise(sce_pseudo,
registration_mod,
block_cor = NaN,
gene_ensembl = "ensembl", gene_name = "gene_name"
)
#> 2024-12-13 19:42:01.803237 running the baseline pairwise model
#> 2024-12-13 19:42:01.821646 computing pairwise statistics
head(results_pairwise_nan)
#> t_stat_G0-G1 t_stat_G0-G2M t_stat_G0-S t_stat_G1-G2M t_stat_G1-S
#> Gene_0001 -0.2419770 0.30095840 0.29195379 0.5429354 0.5339308
#> Gene_0002 0.9655460 0.59274792 1.27049707 -0.3727981 0.3049511
#> Gene_0003 0.3424456 0.90611560 -0.29690010 0.5636700 -0.6393457
#> Gene_0004 -0.5922676 0.06857927 -0.01766777 0.6608469 0.5745998
#> Gene_0005 -1.7497882 -2.61215029 -2.42030683 -0.8623621 -0.6705186
#> Gene_0006 1.7620062 -0.78244810 0.82480493 -2.5444543 -0.9372012
#> t_stat_G2M-S p_value_G0-G1 p_value_G0-G2M p_value_G0-S p_value_G1-G2M
#> Gene_0001 -0.009004609 0.81169124 0.76709393 0.77385274 0.59421797
#> Gene_0002 0.677749150 0.34779939 0.56114829 0.22101069 0.71390337
#> Gene_0003 -1.203015701 0.73621058 0.37753160 0.77013771 0.58033474
#> Gene_0004 -0.086247043 0.56146250 0.94612425 0.98610948 0.51756162
#> Gene_0005 0.191843463 0.09817164 0.01821015 0.02698596 0.40048330
#> Gene_0006 1.607253036 0.09602824 0.44470810 0.42089899 0.02094098
#> p_value_G1-S p_value_G2M-S fdr_G0-G1 fdr_G0-G2M fdr_G0-S fdr_G1-G2M
#> Gene_0001 0.6002984 0.9929202 0.9971067 0.9806277 0.9782050 0.9932537
#> Gene_0002 0.7641032 0.5070458 0.9971067 0.9557062 0.9058626 0.9932537
#> Gene_0003 0.5311147 0.2454479 0.9971067 0.9380457 0.9754753 0.9932537
#> Gene_0004 0.5730837 0.9322771 0.9971067 0.9957322 0.9955379 0.9932537
#> Gene_0005 0.5115293 0.8501363 0.9971067 0.6854563 0.8734187 0.9932537
#> Gene_0006 0.3617722 0.1264013 0.9971067 0.9508593 0.9618345 0.9932537
#> fdr_G1-S fdr_G2M-S logFC_G0-G1 logFC_G0-G2M logFC_G0-S logFC_G1-G2M
#> Gene_0001 0.9942711 0.9998953 -0.14774200 0.18375383 0.17825596 0.3314958
#> Gene_0002 0.9942711 0.9658549 1.16647669 0.71609911 1.53488825 -0.4503776
#> Gene_0003 0.9942711 0.9234394 0.08057902 0.21321313 -0.06986195 0.1326341
#> Gene_0004 0.9942711 0.9978377 -0.49554861 0.05738008 -0.01478258 0.5529287
#> Gene_0005 0.9942711 0.9864989 -1.77777042 -2.65392316 -2.45901178 -0.8761527
#> Gene_0006 0.9942711 0.9218958 1.08614399 -0.48232028 0.50843007 -1.5684643
#> logFC_G1-S logFC_G2M-S ensembl gene
#> Gene_0001 0.3259980 -0.005497874 ENSG1 gene1
#> Gene_0002 0.3684116 0.818789145 ENSG2 gene2
#> Gene_0003 -0.1504410 -0.283075078 ENSG3 gene3
#> Gene_0004 0.4807660 -0.072162656 ENSG4 gene4
#> Gene_0005 -0.6812414 0.194911377 ENSG5 gene5
#> Gene_0006 -0.5777139 0.990750347 ENSG6 gene6