Spatial registration: model

This function defines the statistical model that will be used for computing the block correlation as well as pairwise statistics. It is useful to check it in case your sample-level covariates need to be casted. For example, an integer() variable might have to be casted into a factor() if you wish to model it as a categorical variable and not a continuous one.

registration_model(
  sce_pseudo,
  covars = NULL,
  var_registration = "registration_variable"
)

Arguments

sce_pseudo: The output of registration_pseudobulk().
covars: A character() with names of sample-level covariates.
var_registration: A character(1) specifying the colData(sce_pseudo) variable of interest against which will be used for computing the relevant statistics.

Value

The output of model.matrix() which you can inspect to verify that your sample-level covariates are being properly modeled.

Examples

example("registration_pseudobulk", package = "spatialLIBD")
#> 
#> rgstr_> ## Ensure reproducibility of example data
#> rgstr_> set.seed(20220907)
#> 
#> rgstr_> ## Generate example data
#> rgstr_> sce <- scuttle::mockSCE()
#> 
#> rgstr_> ## Add some sample IDs
#> rgstr_> sce$sample_id <- sample(LETTERS[1:5], ncol(sce), replace = TRUE)
#> 
#> rgstr_> ## Add a sample-level covariate: age
#> rgstr_> ages <- rnorm(5, mean = 20, sd = 4)
#> 
#> rgstr_> names(ages) <- LETTERS[1:5]
#> 
#> rgstr_> sce$age <- ages[sce$sample_id]
#> 
#> rgstr_> ## Add gene-level information
#> rgstr_> rowData(sce)$gene_id <- paste0("ENSG", seq_len(nrow(sce)))
#> 
#> rgstr_> rowData(sce)$gene_name <- paste0("gene", seq_len(nrow(sce)))
#> 
#> rgstr_> ## Pseudo-bulk by Cell Cycle
#> rgstr_> sce_pseudo <- registration_pseudobulk(
#> rgstr_+     sce,
#> rgstr_+     var_registration = "Cell_Cycle",
#> rgstr_+     var_sample_id = "sample_id",
#> rgstr_+     covars = c("age"),
#> rgstr_+     min_ncells = NULL
#> rgstr_+ )
#> 2025-11-20 15:38:52.790313 make pseudobulk object
#> 2025-11-20 15:38:52.93436 drop lowly expressed genes
#> 2025-11-20 15:38:52.982242 normalize expression
#> 
#> rgstr_> colData(sce_pseudo)
#> DataFrame with 20 rows and 9 columns
#>      Mutation_Status  Cell_Cycle   Treatment   sample_id       age
#>          <character> <character> <character> <character> <numeric>
#> A_G0              NA          G0          NA           A   19.1872
#> B_G0              NA          G0          NA           B   25.3496
#> C_G0              NA          G0          NA           C   24.1802
#> D_G0              NA          G0          NA           D   15.5211
#> E_G0              NA          G0          NA           E   20.9701
#> ...              ...         ...         ...         ...       ...
#> A_S               NA           S          NA           A   19.1872
#> B_S               NA           S          NA           B   25.3496
#> C_S               NA           S          NA           C   24.1802
#> D_S               NA           S          NA           D   15.5211
#> E_S               NA           S          NA           E   20.9701
#>      registration_variable registration_sample_id    ncells pseudo_sum_umi
#>                <character>            <character> <integer>      <numeric>
#> A_G0                    G0                      A         8        2946915
#> B_G0                    G0                      B        13        4922867
#> C_G0                    G0                      C         9        3398888
#> D_G0                    G0                      D         7        2630651
#> E_G0                    G0                      E        10        3761710
#> ...                    ...                    ...       ...            ...
#> A_S                      S                      A        12        4516334
#> B_S                      S                      B         8        2960685
#> C_S                      S                      C         7        2595774
#> D_S                      S                      D        14        5233560
#> E_S                      S                      E        11        4151818
#> 
#> rgstr_> rowData(sce_pseudo)
#> DataFrame with 2000 rows and 3 columns
#>               gene_id   gene_name        gene_search
#>           <character> <character>        <character>
#> Gene_0001       ENSG1       gene1       gene1; ENSG1
#> Gene_0002       ENSG2       gene2       gene2; ENSG2
#> Gene_0003       ENSG3       gene3       gene3; ENSG3
#> Gene_0004       ENSG4       gene4       gene4; ENSG4
#> Gene_0005       ENSG5       gene5       gene5; ENSG5
#> ...               ...         ...                ...
#> Gene_1996    ENSG1996    gene1996 gene1996; ENSG1996
#> Gene_1997    ENSG1997    gene1997 gene1997; ENSG1997
#> Gene_1998    ENSG1998    gene1998 gene1998; ENSG1998
#> Gene_1999    ENSG1999    gene1999 gene1999; ENSG1999
#> Gene_2000    ENSG2000    gene2000 gene2000; ENSG2000
registration_mod <- registration_model(sce_pseudo, "age")
#> 2025-11-20 15:38:53.082108 create model matrix
head(registration_mod)
#>      registration_variableG0 registration_variableG1 registration_variableG2M
#> A_G0                       1                       0                        0
#> B_G0                       1                       0                        0
#> C_G0                       1                       0                        0
#> D_G0                       1                       0                        0
#> E_G0                       1                       0                        0
#> A_G1                       0                       1                        0
#>      registration_variableS      age
#> A_G0                      0 19.18719
#> B_G0                      0 25.34965
#> C_G0                      0 24.18019
#> D_G0                      0 15.52107
#> E_G0                      0 20.97006
#> A_G1                      0 19.18719

Arguments

Value

See also

Examples