R/registration_block_cor.R
registration_block_cor.Rd
This function computes the block correlation using the sample ID as the blocking factor. This takes into account that cells in scRNA-seq data or spots in spatially-resolved transcriptomics data from Visium (or similar) have a sample ID batch effect.
registration_block_cor(
sce_pseudo,
registration_model,
var_sample_id = "registration_sample_id"
)
The output of registration_pseudobulk()
.
The output from registration_model()
.
A character(1)
specifying the colData(sce_pseudo)
variable with the sample ID.
A numeric(1)
with the block correlation at the sample ID level.
Other spatial registration and statistical modeling functions:
registration_model()
,
registration_pseudobulk()
,
registration_stats_anova()
,
registration_stats_enrichment()
,
registration_stats_pairwise()
,
registration_wrapper()
example("registration_model", package = "spatialLIBD")
#>
#> rgstr_> example("registration_pseudobulk", package = "spatialLIBD")
#>
#> rgstr_> ## Ensure reproducibility of example data
#> rgstr_> set.seed(20220907)
#>
#> rgstr_> ## Generate example data
#> rgstr_> sce <- scuttle::mockSCE()
#>
#> rgstr_> ## Add some sample IDs
#> rgstr_> sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE)
#>
#> rgstr_> ## Add a sample-level covariate: age
#> rgstr_> ages <- rnorm(5, mean = 20, sd = 4)
#>
#> rgstr_> names(ages) <- LETTERS[1:5]
#>
#> rgstr_> sce$age <- ages[sce$sample_id]
#>
#> rgstr_> ## Add gene-level information
#> rgstr_> rowData(sce)$ensembl <- paste0("ENSG", seq_len(nrow(sce)))
#>
#> rgstr_> rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce)))
#>
#> rgstr_> ## Pseudo-bulk
#> rgstr_> sce_pseudo <- registration_pseudobulk(sce, "Cell_Cycle", "sample_id", c("age"), min_ncells = NULL)
#> 2024-12-16 21:52:02.262848 make pseudobulk object
#> 2024-12-16 21:52:02.385851 drop lowly expressed genes
#> 2024-12-16 21:52:02.445989 normalize expression
#>
#> rgstr_> colData(sce_pseudo)
#> DataFrame with 20 rows and 8 columns
#> Mutation_Status Cell_Cycle Treatment sample_id age
#> <character> <character> <character> <character> <numeric>
#> A_G0 NA G0 NA A 19.1872
#> B_G0 NA G0 NA B 25.3496
#> C_G0 NA G0 NA C 24.1802
#> D_G0 NA G0 NA D 15.5211
#> E_G0 NA G0 NA E 20.9701
#> ... ... ... ... ... ...
#> A_S NA S NA A 19.1872
#> B_S NA S NA B 25.3496
#> C_S NA S NA C 24.1802
#> D_S NA S NA D 15.5211
#> E_S NA S NA E 20.9701
#> registration_variable registration_sample_id ncells
#> <character> <character> <integer>
#> A_G0 G0 A 8
#> B_G0 G0 B 13
#> C_G0 G0 C 9
#> D_G0 G0 D 7
#> E_G0 G0 E 10
#> ... ... ... ...
#> A_S S A 12
#> B_S S B 8
#> C_S S C 7
#> D_S S D 14
#> E_S S E 11
#>
#> rgstr_> registration_mod <- registration_model(sce_pseudo, "age")
#> 2024-12-16 21:52:02.526203 create model matrix
#>
#> rgstr_> head(registration_mod)
#> registration_variableG0 registration_variableG1 registration_variableG2M
#> A_G0 1 0 0
#> B_G0 1 0 0
#> C_G0 1 0 0
#> D_G0 1 0 0
#> E_G0 1 0 0
#> A_G1 0 1 0
#> registration_variableS age
#> A_G0 0 19.18719
#> B_G0 0 25.34965
#> C_G0 0 24.18019
#> D_G0 0 15.52107
#> E_G0 0 20.97006
#> A_G1 0 19.18719
block_cor <- registration_block_cor(sce_pseudo, registration_mod)
#> 2024-12-16 21:52:02.53883 run duplicateCorrelation()
#> 2024-12-16 21:52:03.832235 The estimated correlation is: -0.0187869166526901