library(brainstorm)
library(VariantAnnotation)
#> Loading required package: BiocGenerics
#> 
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:stats':
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#> 
#>     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#>     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#>     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#>     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#>     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#>     union, unique, unsplit, which.max, which.min
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#> 
#> Attaching package: 'MatrixGenerics'
#> The following objects are masked from 'package:matrixStats':
#> 
#>     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#>     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#>     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#>     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#>     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#>     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#>     colWeightedMeans, colWeightedMedians, colWeightedSds,
#>     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#>     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#>     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#>     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#>     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#>     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#>     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#>     rowWeightedSds, rowWeightedVars
#> Loading required package: GenomeInfoDb
#> Loading required package: S4Vectors
#> Loading required package: stats4
#> 
#> Attaching package: 'S4Vectors'
#> The following objects are masked from 'package:base':
#> 
#>     expand.grid, I, unname
#> Loading required package: IRanges
#> Loading required package: GenomicRanges
#> Loading required package: SummarizedExperiment
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> 
#> Attaching package: 'Biobase'
#> The following object is masked from 'package:MatrixGenerics':
#> 
#>     rowMedians
#> The following objects are masked from 'package:matrixStats':
#> 
#>     anyMissing, rowMedians
#> Loading required package: Rsamtools
#> Loading required package: Biostrings
#> Loading required package: XVector
#> 
#> Attaching package: 'Biostrings'
#> The following object is masked from 'package:base':
#> 
#>     strsplit
#> 
#> Attaching package: 'VariantAnnotation'
#> The following object is masked from 'package:base':
#> 
#>     tabulate
library(here)
#> here() starts at /__w/brainstorm/brainstorm
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following object is masked from 'package:VariantAnnotation':
#> 
#>     select
#> The following objects are masked from 'package:Biostrings':
#> 
#>     collapse, intersect, setdiff, setequal, union
#> The following object is masked from 'package:XVector':
#> 
#>     slice
#> The following object is masked from 'package:Biobase':
#> 
#>     combine
#> The following objects are masked from 'package:GenomicRanges':
#> 
#>     intersect, setdiff, union
#> The following object is masked from 'package:GenomeInfoDb':
#> 
#>     intersect
#> The following objects are masked from 'package:IRanges':
#> 
#>     collapse, desc, intersect, setdiff, slice, union
#> The following objects are masked from 'package:S4Vectors':
#> 
#>     first, intersect, rename, setdiff, setequal, union
#> The following object is masked from 'package:matrixStats':
#> 
#>     count
#> The following objects are masked from 'package:BiocGenerics':
#> 
#>     combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
library(purrr)
#> 
#> Attaching package: 'purrr'
#> The following object is masked from 'package:XVector':
#> 
#>     compact
#> The following object is masked from 'package:GenomicRanges':
#> 
#>     reduce
#> The following object is masked from 'package:IRanges':
#> 
#>     reduce
library(pheatmap)

#Prep Data ## Prep DNA data

snpsGeno <- make_snpsGeno(snpsGeno_VCF)
snpsGeno[1:5, 1:5]

Prep RNA data

Filter “snps snpsCalled_VCF” data

# Make sure colnames are all IDs in pd table
all(colnames(snpsCalled_VCF)  %in% pd_example$SAMPLE_ID)
colnames(snpsCalled_VCF) <- ss(colnames(snpsCalled_VCF),"_accepted")
all(colnames(snpsCalled_VCF)  %in% pd_example$SAMPLE_ID)

dim(snpsCalled_VCF)
snpsCalled_filter <- filter_called(snpsCalled_VCF)
dim(snpsCalled_filter)

Create snpsGeno2 and snpssnpsCalled_VCF

snpsRNA <- make_snpsRNA(snpsGeno_VCF, snpsCalled_filter)
print("snpsGeno2: Matching Genotype snps")
snpsRNA$snpsGeno2[1:5, 1:5]
print("snpsCalled: snpsCalled_VCF RNA snps")
snpsRNA$snpsCalled[1:5, 1:5]

Build Correlation Tables

DNA vs. DNA

No high correlation between samples.

basic_cor <- cor(snpsGeno, use = "pairwise.comp")

pheatmap(
    basic_cor,
    cluster_rows = FALSE,
    show_rownames = FALSE,
    cluster_cols = FALSE,
    show_colnames = FALSE
)
all(colnames(snpsGeno) %in% brain_sentrix$ID)

corLong_dna_dna <- make_corLong(snpsGeno, BrainTable1 = brain_sentrix)
head(corLong_dna_dna)

But samples from the same brain have low correlation.

corLong_dna_dna %>%
    filter(row_BrNum == col_BrNum) 

RNA vs. RNA

basic_cor <- cor(snpsRNA$snpsCalled, use = "pairwise.comp")

pheatmap(
    basic_cor,
    cluster_rows = FALSE,
    show_rownames = FALSE,
    cluster_cols = FALSE,
    show_colnames = FALSE
)
pd_simple <- pd_example[,c("SAMPLE_ID", "RNum", "BrNum", "BrainRegion")]

corLong_rna_rna <- make_corLong(
    snps1 = snpsRNA$snpsCalled,
    BrainTable1 = pd_simple,
    ID_col1 = "SAMPLE_ID"
)
head(corLong_rna_rna)
corLong_rna_rna %>%
    # filter(row_BrNum == col_BrNum) %>%
    ggplot(aes(x = cor)) +
    geom_density() +
    geom_vline(xintercept = 0.59, color = "red", linetype = "dashed")

DNA vs. RNA

corLong_dna_rna <- make_corLong(
    snps1 = snpsRNA$snpsGeno2,
    snps2 = snpsRNA$snpsCalled,
    BrainTable1 = brain_sentrix,
    BrainTable2 = pd_simple,
    ID_col1 = "ID",
    ID_col2 = "SAMPLE_ID"
)

head(corLong_dna_rna)
corLong_dna_rna %>%
    filter(row_BrNum == col_BrNum) %>%
    ggplot(aes(x = cor)) +
    geom_density() +
    geom_vline(xintercept = 0.59, color = "red", linetype = "dashed")

Run Grouper

dna_dna_groups <- grouper(corLong_dna_dna)
length(dna_dna_groups)
table(unlist(purrr::map_int(dna_dna_groups, "nBrNum")))

Find problem groups & samples

dna_multi_br <- keep(dna_dna_groups, ~ .x$nBrNum > 1)
length(dna_multi_br)
rna_rna_groups <- grouper(corLong_rna_rna)
length(rna_rna_groups)

message("How many samples in each group?")
table(purrr::map_int(rna_rna_groups, "n"))

message("How many Brains in each group?")
table(purrr::map_int(rna_rna_groups, "nBrNum"))
dna_rna_groups <- grouper(corLong_dna_rna)
message("How many groups?")
length(dna_rna_groups)

message("How many samples in each group?")
table(purrr::map_int(dna_rna_groups, "n"))

message("How many Brains in each group?")
table(purrr::map_int(dna_rna_groups, "nBrNum"))
multi_samples <- keep(rna_rna_groups, ~ .x$n > 1)

Session Info

sessioninfo::session_info()