library(brainstorm)
library(VariantAnnotation)
#> Loading required package: BiocGenerics
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#> dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#> grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#> order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#> rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#> union, unique, unsplit, which.max, which.min
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#>
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#>
#> colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#> colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#> colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#> colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#> colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#> colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#> colWeightedMeans, colWeightedMedians, colWeightedSds,
#> colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#> rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#> rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#> rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#> rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#> rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#> rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#> rowWeightedSds, rowWeightedVars
#> Loading required package: GenomeInfoDb
#> Loading required package: S4Vectors
#> Loading required package: stats4
#>
#> Attaching package: 'S4Vectors'
#> The following objects are masked from 'package:base':
#>
#> expand.grid, I, unname
#> Loading required package: IRanges
#> Loading required package: GenomicRanges
#> Loading required package: SummarizedExperiment
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#>
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#>
#> rowMedians
#> The following objects are masked from 'package:matrixStats':
#>
#> anyMissing, rowMedians
#> Loading required package: Rsamtools
#> Loading required package: Biostrings
#> Loading required package: XVector
#>
#> Attaching package: 'Biostrings'
#> The following object is masked from 'package:base':
#>
#> strsplit
#>
#> Attaching package: 'VariantAnnotation'
#> The following object is masked from 'package:base':
#>
#> tabulate
library(here)
#> here() starts at /__w/brainstorm/brainstorm
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:VariantAnnotation':
#>
#> select
#> The following objects are masked from 'package:Biostrings':
#>
#> collapse, intersect, setdiff, setequal, union
#> The following object is masked from 'package:XVector':
#>
#> slice
#> The following object is masked from 'package:Biobase':
#>
#> combine
#> The following objects are masked from 'package:GenomicRanges':
#>
#> intersect, setdiff, union
#> The following object is masked from 'package:GenomeInfoDb':
#>
#> intersect
#> The following objects are masked from 'package:IRanges':
#>
#> collapse, desc, intersect, setdiff, slice, union
#> The following objects are masked from 'package:S4Vectors':
#>
#> first, intersect, rename, setdiff, setequal, union
#> The following object is masked from 'package:matrixStats':
#>
#> count
#> The following objects are masked from 'package:BiocGenerics':
#>
#> combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
library(purrr)
#>
#> Attaching package: 'purrr'
#> The following object is masked from 'package:XVector':
#>
#> compact
#> The following object is masked from 'package:GenomicRanges':
#>
#> reduce
#> The following object is masked from 'package:IRanges':
#>
#> reduce
library(pheatmap)
#Prep Data ## Prep DNA data
snpsGeno <- make_snpsGeno(snpsGeno_VCF)
snpsGeno[1:5, 1:5]
# Make sure colnames are all IDs in pd table
all(colnames(snpsCalled_VCF) %in% pd_example$SAMPLE_ID)
colnames(snpsCalled_VCF) <- ss(colnames(snpsCalled_VCF),"_accepted")
all(colnames(snpsCalled_VCF) %in% pd_example$SAMPLE_ID)
dim(snpsCalled_VCF)
snpsCalled_filter <- filter_called(snpsCalled_VCF)
dim(snpsCalled_filter)
snpsRNA <- make_snpsRNA(snpsGeno_VCF, snpsCalled_filter)
print("snpsGeno2: Matching Genotype snps")
snpsRNA$snpsGeno2[1:5, 1:5]
print("snpsCalled: snpsCalled_VCF RNA snps")
snpsRNA$snpsCalled[1:5, 1:5]
No high correlation between samples.
basic_cor <- cor(snpsGeno, use = "pairwise.comp")
pheatmap(
basic_cor,
cluster_rows = FALSE,
show_rownames = FALSE,
cluster_cols = FALSE,
show_colnames = FALSE
)
all(colnames(snpsGeno) %in% brain_sentrix$ID)
corLong_dna_dna <- make_corLong(snpsGeno, BrainTable1 = brain_sentrix)
head(corLong_dna_dna)
But samples from the same brain have low correlation.
basic_cor <- cor(snpsRNA$snpsCalled, use = "pairwise.comp")
pheatmap(
basic_cor,
cluster_rows = FALSE,
show_rownames = FALSE,
cluster_cols = FALSE,
show_colnames = FALSE
)
pd_simple <- pd_example[,c("SAMPLE_ID", "RNum", "BrNum", "BrainRegion")]
corLong_rna_rna <- make_corLong(
snps1 = snpsRNA$snpsCalled,
BrainTable1 = pd_simple,
ID_col1 = "SAMPLE_ID"
)
head(corLong_rna_rna)
corLong_rna_rna %>%
# filter(row_BrNum == col_BrNum) %>%
ggplot(aes(x = cor)) +
geom_density() +
geom_vline(xintercept = 0.59, color = "red", linetype = "dashed")
corLong_dna_rna <- make_corLong(
snps1 = snpsRNA$snpsGeno2,
snps2 = snpsRNA$snpsCalled,
BrainTable1 = brain_sentrix,
BrainTable2 = pd_simple,
ID_col1 = "ID",
ID_col2 = "SAMPLE_ID"
)
head(corLong_dna_rna)
corLong_dna_rna %>%
filter(row_BrNum == col_BrNum) %>%
ggplot(aes(x = cor)) +
geom_density() +
geom_vline(xintercept = 0.59, color = "red", linetype = "dashed")
dna_dna_groups <- grouper(corLong_dna_dna)
length(dna_dna_groups)
table(unlist(purrr::map_int(dna_dna_groups, "nBrNum")))
rna_rna_groups <- grouper(corLong_rna_rna)
length(rna_rna_groups)
message("How many samples in each group?")
table(purrr::map_int(rna_rna_groups, "n"))
message("How many Brains in each group?")
table(purrr::map_int(rna_rna_groups, "nBrNum"))
dna_rna_groups <- grouper(corLong_dna_rna)
message("How many groups?")
length(dna_rna_groups)
message("How many samples in each group?")
table(purrr::map_int(dna_rna_groups, "n"))
message("How many Brains in each group?")
table(purrr::map_int(dna_rna_groups, "nBrNum"))
multi_samples <- keep(rna_rna_groups, ~ .x$n > 1)
sessioninfo::session_info()