This document contains the code that creates the recount_brain
table by merging all the curated SRA metadata tables created by Ashkaun Razmara.
First, we need some packages.
library('devtools')
library('recount')
library('knitcitations')
library('BiocStyle')
Next we setup the citation information.
## Load knitcitations with a clean bibliography
cleanbib()
cite_options(hyperlink = 'to.doc', citation_format = 'text', style = 'html')
# Note links won't show for now due to the following issue
# https://github.com/cboettig/knitcitations/issues/63
## Write bibliography information
bib <- c(
R = citation(),
BiocStyle = citation('BiocStyle'),
devtools = citation('devtools'),
knitcitations = citation('knitcitations'),
knitr = citation('knitr')[3],
recount = citation('recount')[1],
rmarkdown = citation('rmarkdown')[1]
)
write.bibtex(bib, file = 'merging_data.bib')
This code finds all the SRA metadata tables and reads them into R.
## Find table files and name them by project id
table_files <- dir('../SRA_metadata', pattern = '.csv$', full.names = TRUE)
names(table_files) <- sapply(strsplit(table_files, ', '), function(x) {
strsplit(x[2], '-Table')[[1]][1] })
## Read the table files
table_content <- lapply(table_files, read.csv, header = TRUE,
stringsAsFactors = FALSE, na.strings = c('NA', ''))
Having read the tables, we can check how frequent are some variables that we will ignore in recount_brain
. Most of these variables are rare and just present in a handful of studies. Others like Sequencing.Kit
are present frequently as column names but mostly made up of NAs due to how we constructed the SRA metadata tables.
## Define the main variables we want from all studies
main_variables <- c('Center_Name_s', 'Library_Name_s', 'AvgSpotLen_l',
'BioSample_s', 'Experiment_s', 'MBases_l', 'MBytes_l', 'Run_s',
'SRA_Sample_s', 'Sample_Name_s', 'BioProject_s', 'Consent_s',
'InsertSize_l', 'Instrument_s', 'LibraryLayout_s', 'LibrarySelection_s',
'LibrarySource_s', 'LoadDate_s', 'Platform_s', 'ReleaseDate_s',
'SRA_Study_s', 'Assay_Type_s', 'Organism_s', 'Brain.Bank', 'Sex',
'Age.Units', 'Age', 'Development', 'Race', 'Sample.Origin', 'Cell.Line',
'Tissue.Site.1', 'Tissue.Site.2', 'Tissue.Site.3', 'Brodmann.Area',
'Hemisphere', 'Disease.Status', 'Disease', 'Tumor.Type',
'Clinical.Stage.1', 'Clinical.Stage.2', 'Pathology', 'Viability',
'Preparation', 'PMI', 'PMI.Units', 'RIN')
## Check whether the variables are part of our set or extra variables
## that we will ignore.
check_vars <- lapply(table_content , function(tab) {
present <- tolower(colnames(tab))[tolower(colnames(tab)) %in%
tolower(main_variables)]
tobeignored <- tolower(colnames(tab))[!tolower(colnames(tab)) %in%
tolower(main_variables)]
return(list(present = present, tobeignored = tobeignored))
})
## Variables that will be ignored
sort(table(unlist(sapply(check_vars, '[[', 'tobeignored'))), decreasing = TRUE)
##
## x sequencing.kit
## 62 61
## x.1 x.2
## 35 26
## source_name_s tissue_s
## 24 18
## x.3 biomaterial_provider_s
## 11 10
## biosamplemodel_s isolate_s
## 10 9
## x.4 cell_type_s
## 8 6
## assemblyname_s organism
## 5 4
## assay.type disease_state_s
## 3 3
## molecule_subtype_s barcode_s
## 3 2
## cell_line_s clip_antibody_s
## 2 2
## disease_s genotype_s
## 2 2
## health_state_s label_s
## 2 2
## librarypreparation_s sample_id_s
## 2 2
## strain_s tissue_type_s
## 2 2
## age_of_onset_s apoe_genotype_s
## 1 1
## base.gb. base.gb..1
## 1 1
## base.gb..2 braak_stage_s
## 1 1
## brain_region_s cag_s
## 1 1
## cell_subtype_s derived_cell_typed_s
## 1 1
## development_stage_s diagnosis_s
## 1 1
## disease_status_s duration_s
## 1 1
## expression_construct_s facs_population_s
## 1 1
## flowcell_s fraction_s
## 1 1
## genotype_variation_s h_v_cortical_score_s
## 1 1
## h_v_striatal_score_s histology
## 1 1
## identifier_s idh1.mutation
## 1 1
## individual_s karyotype_s
## 1 1
## lane_s library_id_s
## 1 1
## lithium_treated_s met
## 1 1
## microarray_study_s mrna_seq_reads_s
## 1 1
## organism_part_s origin
## 1 1
## pathology_occurrence_s pathology_s
## 1 1
## proteomics_study_s proteomics_sv1_s
## 1 1
## proteomics_sv2_s proteomics_sv3_s
## 1 1
## race_s rate...
## 1 1
## rate....1 region_s
## 1 1
## sample_comment_s sample_group_s
## 1 1
## sample_type_s sampleid
## 1 1
## seqc_sample_s site_s
## 1 1
## slc26a11 specimen_with_known_storage_state_s
## 1 1
## stage_s subject_group_s
## 1 1
## subject_id_s tacc3
## 1 1
## tissue_compartment_s transduced_with_s
## 1 1
## treatment_s tumor_stage_s
## 1 1
## vonsattel_grade_s x.5
## 1 1
## Check that we are not missing any due to spelling issues
stopifnot(all(
!names(table(unlist(sapply(check_vars, '[[', 'tobeignored')))) %in%
tolower(main_variables)))
Next we can keep just the main variables of interest.
## Keep only the main variables
table_content <- lapply(table_content , function(tab) {
tab[, colnames(tab) %in% main_variables]
})
We can then compute some summary statistics per variable for each study, like what is the percent of missing observations.
## Compute some statistics per variable and study
table_cols <- do.call(rbind, lapply(seq_len(length(table_content)), function(i) {
tab <- table_content[[i]]
df <- data.frame(columns = colnames(tab), number_na = sapply(tab, function(var) { sum(is.na(var))}), n = nrow(tab), study = names(table_content)[i], stringsAsFactors = FALSE)
df$percent_na <- df$number_na / df$n * 100
df$number_obs <- df$n - df$number_na
df$percent_obs <- 100 - df$percent_na
rownames(df) <- NULL
return(df)
}))
## Explore the summary statistics
dim(table_cols)
## [1] 2856 7
summary(table_cols)
## columns number_na n study
## Length:2856 Min. : 0.00 Min. : 4.00 Length:2856
## Class :character 1st Qu.: 0.00 1st Qu.: 8.00 Class :character
## Mode :character Median : 0.00 Median : 15.00 Mode :character
## Mean : 30.35 Mean : 71.41
## 3rd Qu.: 8.00 3rd Qu.: 25.00
## Max. :2898.00 Max. :2898.00
## percent_na number_obs percent_obs
## Min. : 0.0 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.: 0.0
## Median : 0.0 Median : 8.00 Median :100.0
## Mean : 35.1 Mean : 41.06 Mean : 64.9
## 3rd Qu.:100.0 3rd Qu.: 18.00 3rd Qu.:100.0
## Max. :100.0 Max. :2898.00 Max. :100.0
## Most commonly observed variables accross all studies
sort(table(table_cols$columns), decreasing = TRUE)
##
## Age Age.Units AvgSpotLen_l BioSample_s
## 62 62 62 62
## Brain.Bank Brodmann.Area Cell.Line Clinical.Stage.1
## 62 62 62 62
## Clinical.Stage.2 Consent_s Development Disease
## 62 62 62 62
## Disease.Status Experiment_s Hemisphere InsertSize_l
## 62 62 62 62
## Instrument_s LibraryLayout_s LibrarySelection_s LibrarySource_s
## 62 62 62 62
## LoadDate_s MBases_l MBytes_l Pathology
## 62 62 62 62
## Platform_s PMI PMI.Units Preparation
## 62 62 62 62
## Race ReleaseDate_s RIN Run_s
## 62 62 62 62
## Sample.Origin Sex SRA_Sample_s SRA_Study_s
## 62 62 62 62
## Tissue.Site.1 Tissue.Site.2 Tissue.Site.3 Tumor.Type
## 62 62 62 62
## Viability BioProject_s Center_Name_s Sample_Name_s
## 62 61 60 60
## Assay_Type_s Organism_s Library_Name_s
## 59 58 16
## now in percent
round(sort(table(table_cols$columns), decreasing = TRUE) /
length(table_content) * 100, 2)
##
## Age Age.Units AvgSpotLen_l BioSample_s
## 100.00 100.00 100.00 100.00
## Brain.Bank Brodmann.Area Cell.Line Clinical.Stage.1
## 100.00 100.00 100.00 100.00
## Clinical.Stage.2 Consent_s Development Disease
## 100.00 100.00 100.00 100.00
## Disease.Status Experiment_s Hemisphere InsertSize_l
## 100.00 100.00 100.00 100.00
## Instrument_s LibraryLayout_s LibrarySelection_s LibrarySource_s
## 100.00 100.00 100.00 100.00
## LoadDate_s MBases_l MBytes_l Pathology
## 100.00 100.00 100.00 100.00
## Platform_s PMI PMI.Units Preparation
## 100.00 100.00 100.00 100.00
## Race ReleaseDate_s RIN Run_s
## 100.00 100.00 100.00 100.00
## Sample.Origin Sex SRA_Sample_s SRA_Study_s
## 100.00 100.00 100.00 100.00
## Tissue.Site.1 Tissue.Site.2 Tissue.Site.3 Tumor.Type
## 100.00 100.00 100.00 100.00
## Viability BioProject_s Center_Name_s Sample_Name_s
## 100.00 98.39 96.77 96.77
## Assay_Type_s Organism_s Library_Name_s
## 95.16 93.55 25.81
## Example of how we can explore the percent NA for the Age variable
#boxplot(percent_na ~ columns, data = table_cols, las = 2)
#hist(subset(table_cols, columns == 'Age')$percent_na)
table(subset(table_cols, columns == 'Age')$percent_na)
##
## 0 0.72992700729927 4.08163265306122 100
## 28 1 1 32
#summary(subset(table_cols, columns == 'Age')$percent_na)
Having read and filtered the tables, we can now proceed to merging them. This is how we create the recount_brain
table.
## Get the unique columns
unique_cols <- unique(table_cols$columns)
## Sort by percent NA
unique_cols_sorted <- tolower(names(sort(tapply(table_cols$percent_na,
table_cols$columns, mean))))
## Build a new set of tables by study with all the columns
table_new <- lapply(table_content, function(tab) {
colnames(tab) <- tolower(colnames(tab))
missing_cols <- unique_cols[!unique_cols %in% colnames(tab)]
if(length(missing_cols) > 0) {
df_add <- as.data.frame(matrix(NA, nrow = nrow(tab), ncol = length(missing_cols)))
colnames(df_add) <- missing_cols
df_new <- cbind(tab, df_add)
colnames(df_new) <- tolower(colnames(df_new))
res <- df_new[, match(unique_cols_sorted, colnames(df_new))]
} else {
res <- tab
}
return(res)
})
## Finally merge them all
recount_brain <- do.call(rbind, table_new)
Now that we have the recount_brain
table, we can now check if the samples are present in recount
(Collado-Torres, Nellore, Kammers, Ellis, et al., 2017) and save the data which can then be accessed easily thanks to recount::add_metadata('recount_brain')
.
## Check which samples are in recount
m <- all_metadata('sra')
## 2020-11-13 16:24:06 downloading the metadata to /tmp/RtmpK9pZcs/metadata_clean_sra.Rdata
map <- match(m$run, recount_brain$run_s)
table(!is.na(map))
##
## FALSE TRUE
## 46885 3214
map_r <- match(recount_brain$run_s, m$run)
table(!is.na(map_r))
##
## FALSE TRUE
## 1217 3214
## Add whether the sample is present in recount
recount_brain$present_in_recount <- !is.na(map_r)
## Sort alphabetically and change dots for underscores
#recount_brain <- recount_brain[, sort(colnames(recount_brain))]
colnames(recount_brain) <- gsub('\\.', '_', colnames(recount_brain))
## Fix some issues
recount_brain$sex <- tolower(recount_brain$sex)
for(i in which(sapply(recount_brain, class) == 'character')) {
if(any(grepl('[[:space:]]+$', recount_brain[, i]))) {
recount_brain[, i] <- gsub('[[:space:]]+$', '', recount_brain[, i])
}
}
## Final dimensions of recount_brain
dim(recount_brain)
## [1] 4431 48
## Save the data
save(recount_brain, file = 'recount_brain_v1.Rdata')
write.csv(recount_brain, file = 'recount_brain_v1.csv', quote = TRUE, row.names = FALSE)
## Check md5sum for the resulting files
sapply(dir(pattern = 'recount_brain_v1'), tools::md5sum)
## recount_brain_v1.csv.recount_brain_v1.csv
## "9e6bb5d1ce8e58e951b3afc5262a31d2"
## recount_brain_v1.Rdata.recount_brain_v1.Rdata
## "1ba96e3551072b64dd4ad3a182dc7f95"
## List of all variables
colnames(recount_brain)
## [1] "assay_type_s" "avgspotlen_l" "bioproject_s"
## [4] "biosample_s" "center_name_s" "consent_s"
## [7] "disease_status" "experiment_s" "insertsize_l"
## [10] "instrument_s" "librarylayout_s" "libraryselection_s"
## [13] "librarysource_s" "loaddate_s" "mbases_l"
## [16] "mbytes_l" "organism_s" "platform_s"
## [19] "releasedate_s" "run_s" "sample_name_s"
## [22] "sra_sample_s" "sra_study_s" "library_name_s"
## [25] "sample_origin" "development" "tissue_site_1"
## [28] "sex" "age_units" "age"
## [31] "tissue_site_2" "disease" "tissue_site_3"
## [34] "brain_bank" "preparation" "viability"
## [37] "cell_line" "pmi_units" "pmi"
## [40] "brodmann_area" "clinical_stage_1" "rin"
## [43] "race" "tumor_type" "pathology"
## [46] "clinical_stage_2" "hemisphere" "present_in_recount"
Not all the samples in recount_brain
are present in recount
. The following code explores why some samples are missing in recount
.
## Number missing/present in recount
table(is.na(map_r))
##
## FALSE TRUE
## 3214 1217
## For the missing ones, what is their organism?
table(recount_brain$organism_s[ is.na(map_r) ], useNA = 'ifany')
##
## Homo sapiens synthetic construct
## 811 406
## For the missing ones, check organism and platform
table(recount_brain$organism_s[ is.na(map_r) ],
recount_brain$platform_s[ is.na(map_r) ], useNA = 'ifany')
##
## ABI_SOLID ILLUMINA LS454 PACBIO_SMRT
## Homo sapiens 760 33 12 6
## synthetic construct 154 252 0 0
## For the missing ones, check sra_study and platform
table(recount_brain$sra_study_s[ is.na(map_r) ],
recount_brain$platform_s[ is.na(map_r) ], useNA = 'ifany')
##
## ABI_SOLID ILLUMINA LS454 PACBIO_SMRT
## SRP025982 914 252 12 0
## SRP031868 0 2 0 0
## SRP049776 0 11 0 6
## SRP055730 0 20 0 0
## For all the ones in recount_brain, check assay type and assay_type_s
table(recount_brain$assay_type_s, recount_brain$assay_type, useNA = 'ifany')
##
## RNA-Seq Synthetic-Long-Read WXS <NA>
## RNA-Seq 4364 0 0 0
## Synthetic-Long-Read 0 11 0 0
## WXS 0 0 20 0
## <NA> 0 0 0 36
## For the missing ones, check assay_type_s
table(recount_brain$assay_type_s[ is.na(map_r) ], useNA = 'ifany')
##
## RNA-Seq Synthetic-Long-Read WXS
## 1186 11 20
recount_brain
This section shows examples of how one can explore the data in recount_brain
. It’s up to the user to keep exploring the samples to identify questions of their interest and/or relevant studies.
## Number of samples by disease
table(recount_brain$disease, useNA = 'ifany')
##
## Alzheimer’s disease
## 24
## Amyotrophic lateral sclerosis
## 79
## Angelman syndrome
## 2
## Autism spectrum disorder
## 12
## Bipolar disorder
## 57
## Brain tumor
## 49
## Cortical ischemic stroke tissue
## 7
## Dup15q syndrome
## 2
## Embryonal tumors with multilayered rosettes
## 11
## Epilepsy
## 18
## Huntington's disease
## 32
## Hutchinson-Gilford progeria syndrome
## 4
## Parkinson's disease
## 17
## Parkinson’s disease
## 29
## Primitive neuroectodermal tumor
## 3
## Rett syndrome
## 3
## Schizophrenia
## 19
## Spinal muscular atrophy
## 4
## Tumor
## 411
## ZNF804A Knockdown
## 4
## <NA>
## 3644
## Number of samples per age unit
table(recount_brain$age_units, useNA = 'ifany')
##
## Days Months Post Conception Weeks
## 2 45 12
## Weeks Years <NA>
## 16 919 3437
## Development stage vs presence in recount
table('Development stage' = recount_brain$development,
'Present in recount' = recount_brain$present_in_recount, useNA = 'ifany')
## Present in recount
## Development stage FALSE TRUE
## Adolescent 0 35
## Adult 20 943
## Child 0 58
## Fetus 0 38
## Infant 2 45
## <NA> 1195 2095
## Age by age units for all samples in recount_brain and then those
## also present in recount
par(mar = c(10, 4, 4, 2) + 0.1)
boxplot(age ~ age_units, data = recount_brain, las = 2,
main = 'all recount_brain samples')
boxplot(age ~ age_units, main = 'only samples present in recount',
data = recount_brain[recount_brain$present_in_recount, ], las = 2)
## Get the number of samples for each boxplot
table(recount_brain$age_units[recount_brain$present_in_recount])
##
## Days Months Post Conception Weeks
## 2 43 12
## Weeks Years
## 16 899
Here is an example where we explore how many samples have the post mortem interval (PMI) information registered and the relationship with age for those that have age measured in years.
## How many samples have a pmi unit?
table('PMI units' = recount_brain$pmi_units,
'Present in recount' = recount_brain$present_in_recount, useNA = 'ifany')
## Present in recount
## PMI units FALSE TRUE
## Hours 0 283
## <NA> 1217 2931
## How many samples have a pmi value?
table(!is.na(recount_brain$pmi))
##
## FALSE TRUE
## 4149 282
## Check PMI overall
boxplot(recount_brain$pmi, ylab = 'PMI (hours)')
## Compare PMI vs age (in years) for the samples present in recount
plot(age ~ pmi, data = recount_brain[recount_brain$present_in_recount &
recount_brain$age_units == 'Years', ], ylab = 'Age (years)',
xlab = 'PMI (hours)')
Next we can explore the relationship between sex and other variables such as age and disease status.
## Number of observations with sex recorded
table(recount_brain$sex[recount_brain$present_in_recount], useNA = 'ifany')
##
## female male pooled <NA>
## 251 681 1743 539
## Age vs sex for those that have age measured in years
boxplot(age ~ sex, data = recount_brain[recount_brain$present_in_recount &
recount_brain$age_units == 'Years', ], ylab = 'Age (years)')
## Check age vs disease status
boxplot(age ~ disease_status,
data = recount_brain[recount_brain$present_in_recount &
recount_brain$age_units == 'Years', ], ylab = 'Age (years)')
## Check age disease status and sex
boxplot(age ~ disease_status + sex,
data = recount_brain[recount_brain$present_in_recount &
recount_brain$age_units == 'Years', ], ylab = 'Age (years)')
## Age vs the actual disease
par(mar = c(18, 4, 4, 2) + 0.1)
boxplot(age ~ disease, data = recount_brain[recount_brain$present_in_recount &
recount_brain$age_units == 'Years', ], las = 2, ylab = 'Age (years)')
table(recount_brain$disease[recount_brain$present_in_recount &
recount_brain$age_units == 'Years'], useNA = 'ifany')
##
## Alzheimer’s disease
## 16
## Autism spectrum disorder
## 12
## Bipolar disorder
## 14
## Brain tumor
## 47
## Embryonal tumors with multilayered rosettes
## 11
## Epilepsy
## 16
## Huntington's disease
## 32
## Parkinson’s disease
## 29
## Primitive neuroectodermal tumor
## 3
## Rett syndrome
## 3
## Schizophrenia
## 19
## Tumor
## 282
## <NA>
## 2657
This document was made possible thanks to:
Code for creating this document
## Create the vignette
library('rmarkdown')
system.time(render('merging_data.Rmd', 'BiocStyle::html_document'))
Reproducibility information for this document.
## Reproducibility info
proc.time()
## user system elapsed
## 22.498 2.258 36.396
message(Sys.time())
## 2020-11-13 16:24:13
options(width = 120)
session_info()
## ─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.2 Patched (2020-06-24 r78746)
## os CentOS Linux 7 (Core)
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz US/Eastern
## date 2020-11-13
##
## ─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
## package * version date lib source
## AnnotationDbi 1.50.3 2020-07-25 [2] Bioconductor
## askpass 1.1 2019-01-13 [2] CRAN (R 4.0.0)
## assertthat 0.2.1 2019-03-21 [2] CRAN (R 4.0.0)
## backports 1.2.0 2020-11-02 [1] CRAN (R 4.0.2)
## base64enc 0.1-3 2015-07-28 [2] CRAN (R 4.0.0)
## bibtex 0.4.2.3 2020-09-19 [2] CRAN (R 4.0.2)
## Biobase * 2.48.0 2020-04-27 [2] Bioconductor
## BiocFileCache 1.12.1 2020-08-04 [2] Bioconductor
## BiocGenerics * 0.34.0 2020-04-27 [2] Bioconductor
## BiocManager 1.30.10 2019-11-16 [2] CRAN (R 4.0.0)
## BiocParallel 1.22.0 2020-04-27 [2] Bioconductor
## BiocStyle * 2.16.1 2020-09-25 [1] Bioconductor
## biomaRt 2.44.4 2020-10-13 [2] Bioconductor
## Biostrings 2.56.0 2020-04-27 [2] Bioconductor
## bit 4.0.4 2020-08-04 [2] CRAN (R 4.0.2)
## bit64 4.0.5 2020-08-30 [2] CRAN (R 4.0.2)
## bitops 1.0-6 2013-08-17 [2] CRAN (R 4.0.0)
## blob 1.2.1 2020-01-20 [2] CRAN (R 4.0.0)
## bookdown 0.21 2020-10-13 [1] CRAN (R 4.0.2)
## BSgenome 1.56.0 2020-04-27 [2] Bioconductor
## bumphunter 1.30.0 2020-04-27 [2] Bioconductor
## callr 3.5.1 2020-10-13 [2] CRAN (R 4.0.2)
## checkmate 2.0.0 2020-02-06 [2] CRAN (R 4.0.0)
## cli 2.1.0 2020-10-12 [2] CRAN (R 4.0.2)
## cluster 2.1.0 2019-06-19 [3] CRAN (R 4.0.2)
## codetools 0.2-16 2018-12-24 [3] CRAN (R 4.0.2)
## colorspace 1.4-1 2019-03-18 [2] CRAN (R 4.0.0)
## crayon 1.3.4 2017-09-16 [2] CRAN (R 4.0.0)
## curl 4.3 2019-12-02 [2] CRAN (R 4.0.0)
## data.table 1.13.2 2020-10-19 [2] CRAN (R 4.0.2)
## DBI 1.1.0 2019-12-15 [2] CRAN (R 4.0.0)
## dbplyr 2.0.0 2020-11-03 [1] CRAN (R 4.0.2)
## DelayedArray * 0.14.1 2020-07-14 [2] Bioconductor
## derfinder 1.22.0 2020-04-27 [2] Bioconductor
## derfinderHelper 1.22.0 2020-04-27 [2] Bioconductor
## desc 1.2.0 2018-05-01 [2] CRAN (R 4.0.0)
## devtools * 2.3.2 2020-09-18 [2] CRAN (R 4.0.2)
## digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.2)
## doRNG 1.8.2 2020-01-27 [2] CRAN (R 4.0.0)
## downloader 0.4 2015-07-09 [2] CRAN (R 4.0.0)
## dplyr 1.0.2 2020-08-18 [2] CRAN (R 4.0.2)
## ellipsis 0.3.1 2020-05-15 [2] CRAN (R 4.0.0)
## evaluate 0.14 2019-05-28 [2] CRAN (R 4.0.0)
## fansi 0.4.1 2020-01-08 [2] CRAN (R 4.0.0)
## foreach 1.5.1 2020-10-15 [2] CRAN (R 4.0.2)
## foreign 0.8-80 2020-05-24 [3] CRAN (R 4.0.2)
## Formula 1.2-4 2020-10-16 [2] CRAN (R 4.0.2)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
## generics 0.1.0 2020-10-31 [1] CRAN (R 4.0.2)
## GenomeInfoDb * 1.24.2 2020-06-15 [2] Bioconductor
## GenomeInfoDbData 1.2.3 2020-05-18 [2] Bioconductor
## GenomicAlignments 1.24.0 2020-04-27 [2] Bioconductor
## GenomicFeatures 1.40.1 2020-07-08 [2] Bioconductor
## GenomicFiles 1.24.0 2020-04-27 [2] Bioconductor
## GenomicRanges * 1.40.0 2020-04-27 [2] Bioconductor
## GEOquery 2.56.0 2020-04-27 [2] Bioconductor
## ggplot2 3.3.2 2020-06-19 [2] CRAN (R 4.0.2)
## glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.2)
## gridExtra 2.3 2017-09-09 [2] CRAN (R 4.0.0)
## gtable 0.3.0 2019-03-25 [2] CRAN (R 4.0.0)
## Hmisc 4.4-1 2020-08-10 [2] CRAN (R 4.0.2)
## hms 0.5.3 2020-01-08 [2] CRAN (R 4.0.0)
## htmlTable 2.1.0 2020-09-16 [2] CRAN (R 4.0.2)
## htmltools 0.5.0 2020-06-16 [2] CRAN (R 4.0.2)
## htmlwidgets 1.5.2 2020-10-03 [2] CRAN (R 4.0.2)
## httr 1.4.2 2020-07-20 [2] CRAN (R 4.0.2)
## IRanges * 2.22.2 2020-05-21 [2] Bioconductor
## iterators 1.0.13 2020-10-15 [2] CRAN (R 4.0.2)
## jpeg 0.1-8.1 2019-10-24 [2] CRAN (R 4.0.0)
## jsonlite 1.7.1 2020-09-07 [2] CRAN (R 4.0.2)
## knitcitations * 1.0.10 2019-09-15 [1] CRAN (R 4.0.2)
## knitr 1.30 2020-09-22 [1] CRAN (R 4.0.2)
## lattice 0.20-41 2020-04-02 [3] CRAN (R 4.0.2)
## latticeExtra 0.6-29 2019-12-19 [2] CRAN (R 4.0.0)
## lifecycle 0.2.0 2020-03-06 [2] CRAN (R 4.0.0)
## limma 3.44.3 2020-06-12 [2] Bioconductor
## locfit 1.5-9.4 2020-03-25 [2] CRAN (R 4.0.0)
## lubridate 1.7.9 2020-06-08 [1] CRAN (R 4.0.0)
## magick 2.5.2 2020-11-10 [1] CRAN (R 4.0.2)
## magrittr 1.5 2014-11-22 [2] CRAN (R 4.0.0)
## Matrix 1.2-18 2019-11-27 [3] CRAN (R 4.0.2)
## matrixStats * 0.57.0 2020-09-25 [2] CRAN (R 4.0.2)
## memoise 1.1.0 2017-04-21 [2] CRAN (R 4.0.0)
## munsell 0.5.0 2018-06-12 [2] CRAN (R 4.0.0)
## nnet 7.3-14 2020-04-26 [3] CRAN (R 4.0.2)
## openssl 1.4.3 2020-09-18 [2] CRAN (R 4.0.2)
## pillar 1.4.6 2020-07-10 [2] CRAN (R 4.0.2)
## pkgbuild 1.1.0 2020-07-13 [2] CRAN (R 4.0.2)
## pkgconfig 2.0.3 2019-09-22 [2] CRAN (R 4.0.0)
## pkgload 1.1.0 2020-05-29 [2] CRAN (R 4.0.2)
## plyr 1.8.6 2020-03-03 [2] CRAN (R 4.0.0)
## png 0.1-7 2013-12-03 [2] CRAN (R 4.0.0)
## prettyunits 1.1.1 2020-01-24 [2] CRAN (R 4.0.0)
## processx 3.4.4 2020-09-03 [2] CRAN (R 4.0.2)
## progress 1.2.2 2019-05-16 [2] CRAN (R 4.0.0)
## ps 1.4.0 2020-10-07 [2] CRAN (R 4.0.2)
## purrr 0.3.4 2020-04-17 [2] CRAN (R 4.0.0)
## qvalue 2.20.0 2020-04-27 [2] Bioconductor
## R6 2.5.0 2020-10-28 [1] CRAN (R 4.0.2)
## rappdirs 0.3.1 2016-03-28 [2] CRAN (R 4.0.0)
## RColorBrewer 1.1-2 2014-12-07 [2] CRAN (R 4.0.0)
## Rcpp 1.0.5 2020-07-06 [2] CRAN (R 4.0.2)
## RCurl 1.98-1.2 2020-04-18 [2] CRAN (R 4.0.0)
## readr 1.4.0 2020-10-05 [2] CRAN (R 4.0.2)
## recount * 1.14.0 2020-04-27 [2] Bioconductor
## RefManageR 1.2.12 2019-04-03 [1] CRAN (R 4.0.2)
## remotes 2.2.0 2020-07-21 [2] CRAN (R 4.0.2)
## rentrez 1.2.2 2019-05-02 [2] CRAN (R 4.0.0)
## reshape2 1.4.4 2020-04-09 [2] CRAN (R 4.0.0)
## rlang 0.4.8 2020-10-08 [1] CRAN (R 4.0.2)
## rmarkdown * 2.5 2020-10-21 [1] CRAN (R 4.0.2)
## rngtools 1.5 2020-01-23 [2] CRAN (R 4.0.0)
## rpart 4.1-15 2019-04-12 [3] CRAN (R 4.0.2)
## rprojroot 1.3-2 2018-01-03 [2] CRAN (R 4.0.0)
## Rsamtools 2.4.0 2020-04-27 [2] Bioconductor
## RSQLite 2.2.1 2020-09-30 [2] CRAN (R 4.0.2)
## rstudioapi 0.11 2020-02-07 [2] CRAN (R 4.0.0)
## rtracklayer 1.48.0 2020-04-27 [2] Bioconductor
## S4Vectors * 0.26.1 2020-05-16 [2] Bioconductor
## scales 1.1.1 2020-05-11 [2] CRAN (R 4.0.0)
## sessioninfo 1.1.1 2018-11-05 [2] CRAN (R 4.0.0)
## stringi 1.5.3 2020-09-09 [2] CRAN (R 4.0.2)
## stringr 1.4.0 2019-02-10 [2] CRAN (R 4.0.0)
## SummarizedExperiment * 1.18.2 2020-07-09 [2] Bioconductor
## survival 3.2-3 2020-06-13 [3] CRAN (R 4.0.2)
## testthat 3.0.0 2020-10-31 [1] CRAN (R 4.0.2)
## tibble 3.0.4 2020-10-12 [2] CRAN (R 4.0.2)
## tidyr 1.1.2 2020-08-27 [2] CRAN (R 4.0.2)
## tidyselect 1.1.0 2020-05-11 [2] CRAN (R 4.0.0)
## usethis * 1.6.3 2020-09-17 [2] CRAN (R 4.0.2)
## VariantAnnotation 1.34.0 2020-04-27 [2] Bioconductor
## vctrs 0.3.4 2020-08-29 [1] CRAN (R 4.0.2)
## withr 2.3.0 2020-09-22 [2] CRAN (R 4.0.2)
## xfun 0.19 2020-10-30 [1] CRAN (R 4.0.2)
## XML 3.99-0.5 2020-07-23 [2] CRAN (R 4.0.2)
## xml2 1.3.2 2020-04-23 [2] CRAN (R 4.0.0)
## XVector 0.28.0 2020-04-27 [2] Bioconductor
## yaml 2.2.1 2020-02-01 [2] CRAN (R 4.0.0)
## zlibbioc 1.34.0 2020-04-27 [2] Bioconductor
##
## [1] /users/neagles/R/4.0
## [2] /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-4.0/R/4.0/lib64/R/site-library
## [3] /jhpce/shared/jhpce/core/conda/miniconda3-4.6.14/envs/svnR-4.0/R/4.0/lib64/R/library
This document was generated using BiocStyle (Oleś, Morgan, and Huber, 2020) with knitr (Xie, 2014) and rmarkdown (Allaire, Xie, McPherson, Luraschi, et al., 2020) running behind the scenes.
Citations made with knitcitations (Boettiger, 2019) and the bibliographical file is available here.
[1] J. Allaire, Y. Xie, J. McPherson, J. Luraschi, et al. rmarkdown: Dynamic Documents for R. R package version 2.5. 2020. <URL: https://github.com/rstudio/rmarkdown>.
[2] C. Boettiger. knitcitations: Citations for ‘Knitr’ Markdown Files. R package version 1.0.10. 2019. <URL: https://CRAN.R-project.org/package=knitcitations>.
[3] L. Collado-Torres, A. Nellore, K. Kammers, S. E. Ellis, et al. “Reproducible RNA-seq analysis using recount2”. In: Nature Biotechnology (2017). DOI: 10.1038/nbt.3838. <URL: http://www.nature.com/nbt/journal/v35/n4/full/nbt.3838.html>.
[4] A. Oleś, M. Morgan, and W. Huber. BiocStyle: Standard styles for vignettes and other Bioconductor documents. R package version 2.16.1. 2020. <URL: https://github.com/Bioconductor/BiocStyle>.
[5] R Core Team. R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing. Vienna, Austria, 2020. <URL: https://www.R-project.org/>.
[6] H. Wickham, J. Hester, and W. Chang. devtools: Tools to Make Developing R Packages Easier. R package version 2.3.2. 2020. <URL: https://CRAN.R-project.org/package=devtools>.
[7] Y. Xie. “knitr: A Comprehensive Tool for Reproducible Research in R”. In: Implementing Reproducible Computational Research. Ed. by V. Stodden, F. Leisch and R. D. Peng. ISBN 978-1466561595. Chapman and Hall/CRC, 2014. <URL: http://www.crcpress.com/product/isbn/9781466561595>.