Spatial registration: block correlation

This function computes the block correlation using the sample ID as the blocking factor. This takes into account that cells in scRNA-seq data or spots in spatially-resolved transcriptomics data from Visium (or similar) have a sample ID batch effect.

registration_block_cor(
  sce_pseudo,
  registration_model,
  var_sample_id = "registration_sample_id"
)

Arguments

sce_pseudo: The output of registration_pseudobulk().
registration_model: The output from registration_model().
var_sample_id: A character(1) specifying the colData(sce_pseudo) variable with the sample ID.

Value

A numeric(1) with the block correlation at the sample ID level.

Examples

example("registration_model", package = "spatialLIBD")
#> 
#> rgstr_> example("registration_pseudobulk", package = "spatialLIBD")
#> 
#> rgstr_> ## Ensure reproducibility of example data
#> rgstr_> set.seed(20220907)
#> 
#> rgstr_> ## Generate example data
#> rgstr_> sce <- scuttle::mockSCE()
#> 
#> rgstr_> ## Add some sample IDs
#> rgstr_> sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE)
#> 
#> rgstr_> ## Add a sample-level covariate: age
#> rgstr_> ages <- rnorm(5, mean = 20, sd = 4)
#> 
#> rgstr_> names(ages) <- LETTERS[1:5]
#> 
#> rgstr_> sce$age <- ages[sce$sample_id]
#> 
#> rgstr_> ## Add gene-level information
#> rgstr_> rowData(sce)$gene_id <- paste0("ENSG", seq_len(nrow(sce)))
#> 
#> rgstr_> rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce)))
#> 
#> rgstr_> ## Pseudo-bulk by Cell Cycle
#> rgstr_> sce_pseudo <- registration_pseudobulk(
#> rgstr_+     sce,
#> rgstr_+     var_registration = "Cell_Cycle",
#> rgstr_+     var_sample_id = "sample_id",
#> rgstr_+     covars = c("age"),
#> rgstr_+     min_ncells = NULL
#> rgstr_+ )
#> 2026-03-27 00:06:54.935923 make pseudobulk object
#> 2026-03-27 00:06:55.08072 drop lowly expressed genes
#> 2026-03-27 00:06:55.144482 normalize expression
#> 
#> rgstr_> colData(sce_pseudo)
#> DataFrame with 20 rows and 9 columns
#>      Mutation_Status  Cell_Cycle   Treatment   sample_id       age
#>          <character> <character> <character> <character> <numeric>
#> A_G0              NA          G0          NA           A   19.1872
#> B_G0              NA          G0          NA           B   25.3496
#> C_G0              NA          G0          NA           C   24.1802
#> D_G0              NA          G0          NA           D   15.5211
#> E_G0              NA          G0          NA           E   20.9701
#> ...              ...         ...         ...         ...       ...
#> A_S               NA           S          NA           A   19.1872
#> B_S               NA           S          NA           B   25.3496
#> C_S               NA           S          NA           C   24.1802
#> D_S               NA           S          NA           D   15.5211
#> E_S               NA           S          NA           E   20.9701
#>      registration_variable registration_sample_id    ncells pseudo_sum_umi
#>                <character>            <character> <integer>      <numeric>
#> A_G0                    G0                      A         8        2946915
#> B_G0                    G0                      B        13        4922867
#> C_G0                    G0                      C         9        3398888
#> D_G0                    G0                      D         7        2630651
#> E_G0                    G0                      E        10        3761710
#> ...                    ...                    ...       ...            ...
#> A_S                      S                      A        12        4516334
#> B_S                      S                      B         8        2960685
#> C_S                      S                      C         7        2595774
#> D_S                      S                      D        14        5233560
#> E_S                      S                      E        11        4151818
#> 
#> rgstr_> rowData(sce_pseudo)
#> DataFrame with 2000 rows and 3 columns
#>               gene_id   gene_name        gene_search
#>           <character> <character>        <character>
#> Gene_0001       ENSG1       gene1       gene1; ENSG1
#> Gene_0002       ENSG2       gene2       gene2; ENSG2
#> Gene_0003       ENSG3       gene3       gene3; ENSG3
#> Gene_0004       ENSG4       gene4       gene4; ENSG4
#> Gene_0005       ENSG5       gene5       gene5; ENSG5
#> ...               ...         ...                ...
#> Gene_1996    ENSG1996    gene1996 gene1996; ENSG1996
#> Gene_1997    ENSG1997    gene1997 gene1997; ENSG1997
#> Gene_1998    ENSG1998    gene1998 gene1998; ENSG1998
#> Gene_1999    ENSG1999    gene1999 gene1999; ENSG1999
#> Gene_2000    ENSG2000    gene2000 gene2000; ENSG2000
#> 
#> rgstr_> registration_mod <- registration_model(sce_pseudo, "age")
#> 2026-03-27 00:06:55.250019 create model matrix
#> 
#> rgstr_> head(registration_mod)
#>      registration_variableG0 registration_variableG1 registration_variableG2M
#> A_G0                       1                       0                        0
#> B_G0                       1                       0                        0
#> C_G0                       1                       0                        0
#> D_G0                       1                       0                        0
#> E_G0                       1                       0                        0
#> A_G1                       0                       1                        0
#>      registration_variableS      age
#> A_G0                      0 19.18719
#> B_G0                      0 25.34965
#> C_G0                      0 24.18019
#> D_G0                      0 15.52107
#> E_G0                      0 20.97006
#> A_G1                      0 19.18719
block_cor <- registration_block_cor(sce_pseudo, registration_mod)
#> 2026-03-27 00:06:55.263826 run duplicateCorrelation()
#> 2026-03-27 00:06:56.576979 The estimated correlation is: -0.0187869166526901

Arguments

Value

See also

Examples