R/create_rse.R
create_rse.Rd
Once you have identified a project you want to work with, you can use this
function to construct a recount3
RangedSummarizedExperiment-class
(RSE) object at the gene or exon expression feature level. This function will
retrieve the data, cache it, then assemble the RSE object.
create_rse(
project_info,
type = c("gene", "exon", "jxn"),
annotation = annotation_options(project_info$organism),
bfc = recount3_cache(),
jxn_format = c("ALL", "UNIQUE"),
recount3_url = getOption("recount3_url", "http://duffel.rail.bio/recount3"),
verbose = getOption("recount3_verbose", TRUE)
)
A data.frame()
with one row that contains the
information for the project you are interested in. You can find which
project to work on using available_projects()
.
A character(1)
specifying whether you want to access gene,
exon, or exon-exon junction counts.
A character(1)
specifying which annotation you want to
download. Only used when type
is either gene
or exon
.
A BiocFileCache-class
object where the files will be cached to, typically created by
recount3_cache()
.
A character(1)
specifying whether the exon-exon junction
files are derived from all the reads (ALL
) or only the uniquely mapping
read counts (UNIQUE
). Note that UNIQUE
is only available for some
projects: GTEx and TCGA for human.
A character(1)
specifying the home URL for recount3
or a local directory where you have mirrored recount3
. Defaults to the
load balancer http://duffel.rail.bio/recount3, but can also be
https://recount-opendata.s3.amazonaws.com/recount3/release from
https://registry.opendata.aws/recount/ or SciServer datascope from
IDIES at JHU https://sciserver.org/public-data/recount3/data. You can
set the R option recount3_url
(for example in your .Rprofile
) if
you have a favorite mirror.
A logical(1)
indicating whether to show messages with
updates.
## Find all available human projects
human_projects <- available_projects()
#> 2023-05-07 00:10:43.158444 caching file sra.recount_project.MD.gz.
#> 2023-05-07 00:10:43.471854 caching file gtex.recount_project.MD.gz.
#> 2023-05-07 00:10:43.842242 caching file tcga.recount_project.MD.gz.
## Find the project you are interested in
proj_info <- subset(
human_projects,
project == "SRP009615" & project_type == "data_sources"
)
## Create a RSE object at the gene level
rse_gene_SRP009615 <- create_rse(proj_info)
#> 2023-05-07 00:10:47.629258 downloading and reading the metadata.
#> 2023-05-07 00:10:47.93515 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:10:48.253151 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:10:48.573828 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:10:48.891872 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:10:49.203151 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:10:49.285469 downloading and reading the feature information.
#> 2023-05-07 00:10:49.557602 caching file human.gene_sums.G026.gtf.gz.
#> 2023-05-07 00:10:50.043949 downloading and reading the counts: 12 samples across 63856 features.
#> 2023-05-07 00:10:50.311859 caching file sra.gene_sums.SRP009615.G026.gz.
#> 2023-05-07 00:10:50.493249 constructing the RangedSummarizedExperiment (rse) object.
## Explore the resulting RSE gene object
rse_gene_SRP009615
#> class: RangedSummarizedExperiment
#> dim: 63856 12
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(63856): ENSG00000278704.1 ENSG00000277400.1 ...
#> ENSG00000182484.15_PAR_Y ENSG00000227159.8_PAR_Y
#> rowData names(10): source type ... havana_gene tag
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#> recount_pred.curated.cell_line BigWigURL
## Information about how this RSE object was made
metadata(rse_gene_SRP009615)
#> $time_created
#> [1] "2023-05-07 00:10:50 UTC"
#>
#> $recount3_version
#> package ondiskversion loadedversion path
#> recount3 recount3 1.11.2 1.11.2 /__w/_temp/Library/recount3
#> loadedpath attached is_base date source
#> recount3 /__w/_temp/Library/recount3 TRUE FALSE 2023-05-07 Bioconductor
#> md5ok library
#> recount3 NA /__w/_temp/Library
#>
#> $project
#> [1] "SRP009615"
#>
#> $project_home
#> [1] "data_sources/sra"
#>
#> $type
#> [1] "gene"
#>
#> $organism
#> [1] "human"
#>
#> $annotation
#> [1] "gencode_v26"
#>
#> $recount3_url
#> [1] "http://duffel.rail.bio/recount3"
#>
## Number of genes by number of samples
dim(rse_gene_SRP009615)
#> [1] 63856 12
## Information about the genes
rowRanges(rse_gene_SRP009615)
#> GRanges object with 63856 ranges and 10 metadata columns:
#> seqnames ranges strand | source
#> <Rle> <IRanges> <Rle> | <factor>
#> ENSG00000278704.1 GL000009.2 56140-58376 - | ENSEMBL
#> ENSG00000277400.1 GL000194.1 53590-115018 - | ENSEMBL
#> ENSG00000274847.1 GL000194.1 53594-115055 - | ENSEMBL
#> ENSG00000277428.1 GL000195.1 37434-37534 - | ENSEMBL
#> ENSG00000276256.1 GL000195.1 42939-49164 - | ENSEMBL
#> ... ... ... ... . ...
#> ENSG00000124334.17_PAR_Y chrY 57184101-57197337 + | HAVANA
#> ENSG00000185203.12_PAR_Y chrY 57201143-57203357 - | HAVANA
#> ENSG00000270726.6_PAR_Y chrY 57190738-57208756 + | HAVANA
#> ENSG00000182484.15_PAR_Y chrY 57207346-57212230 + | HAVANA
#> ENSG00000227159.8_PAR_Y chrY 57212184-57214397 - | HAVANA
#> type bp_length phase gene_id
#> <factor> <numeric> <integer> <character>
#> ENSG00000278704.1 gene 2237 <NA> ENSG00000278704.1
#> ENSG00000277400.1 gene 2179 <NA> ENSG00000277400.1
#> ENSG00000274847.1 gene 1599 <NA> ENSG00000274847.1
#> ENSG00000277428.1 gene 101 <NA> ENSG00000277428.1
#> ENSG00000276256.1 gene 2195 <NA> ENSG00000276256.1
#> ... ... ... ... ...
#> ENSG00000124334.17_PAR_Y gene 2504 <NA> ENSG00000124334.17_P..
#> ENSG00000185203.12_PAR_Y gene 1054 <NA> ENSG00000185203.12_P..
#> ENSG00000270726.6_PAR_Y gene 773 <NA> ENSG00000270726.6_PA..
#> ENSG00000182484.15_PAR_Y gene 4618 <NA> ENSG00000182484.15_P..
#> ENSG00000227159.8_PAR_Y gene 1306 <NA> ENSG00000227159.8_PA..
#> gene_type gene_name level
#> <character> <character> <character>
#> ENSG00000278704.1 protein_coding BX004987.1 3
#> ENSG00000277400.1 protein_coding AC145212.2 3
#> ENSG00000274847.1 protein_coding AC145212.1 3
#> ENSG00000277428.1 misc_RNA Y_RNA 3
#> ENSG00000276256.1 protein_coding AC011043.1 3
#> ... ... ... ...
#> ENSG00000124334.17_PAR_Y protein_coding IL9R 2
#> ENSG00000185203.12_PAR_Y antisense WASIR1 2
#> ENSG00000270726.6_PAR_Y processed_transcript AJ271736.10 2
#> ENSG00000182484.15_PAR_Y transcribed_unproces.. WASH6P 2
#> ENSG00000227159.8_PAR_Y unprocessed_pseudogene DDX11L16 2
#> havana_gene tag
#> <character> <character>
#> ENSG00000278704.1 <NA> <NA>
#> ENSG00000277400.1 <NA> <NA>
#> ENSG00000274847.1 <NA> <NA>
#> ENSG00000277428.1 <NA> <NA>
#> ENSG00000276256.1 <NA> <NA>
#> ... ... ...
#> ENSG00000124334.17_PAR_Y OTTHUMG00000022720.1 PAR
#> ENSG00000185203.12_PAR_Y OTTHUMG00000022676.3 PAR
#> ENSG00000270726.6_PAR_Y OTTHUMG00000184987.2 PAR
#> ENSG00000182484.15_PAR_Y OTTHUMG00000022677.5 PAR
#> ENSG00000227159.8_PAR_Y OTTHUMG00000022678.1 PAR
#> -------
#> seqinfo: 374 sequences from an unspecified genome; no seqlengths
## Sample metadata
colnames(colData(rse_gene_SRP009615))
#> [1] "rail_id"
#> [2] "external_id"
#> [3] "study"
#> [4] "sra.sample_acc.x"
#> [5] "sra.experiment_acc"
#> [6] "sra.submission_acc"
#> [7] "sra.submission_center"
#> [8] "sra.submission_lab"
#> [9] "sra.study_title"
#> [10] "sra.study_abstract"
#> [11] "sra.study_description"
#> [12] "sra.experiment_title"
#> [13] "sra.design_description"
#> [14] "sra.sample_description"
#> [15] "sra.library_name"
#> [16] "sra.library_strategy"
#> [17] "sra.library_source"
#> [18] "sra.library_selection"
#> [19] "sra.library_layout"
#> [20] "sra.paired_nominal_length"
#> [21] "sra.paired_nominal_stdev"
#> [22] "sra.library_construction_protocol"
#> [23] "sra.platform_model"
#> [24] "sra.sample_attributes"
#> [25] "sra.experiment_attributes"
#> [26] "sra.spot_length"
#> [27] "sra.sample_name"
#> [28] "sra.sample_title"
#> [29] "sra.sample_bases"
#> [30] "sra.sample_spots"
#> [31] "sra.run_published"
#> [32] "sra.size"
#> [33] "sra.run_total_bases"
#> [34] "sra.run_total_spots"
#> [35] "sra.num_reads"
#> [36] "sra.num_spots"
#> [37] "sra.read_info"
#> [38] "sra.run_alias"
#> [39] "sra.run_center_name"
#> [40] "sra.run_broker_name"
#> [41] "sra.run_center"
#> [42] "recount_project.project"
#> [43] "recount_project.organism"
#> [44] "recount_project.file_source"
#> [45] "recount_project.metadata_source"
#> [46] "recount_project.date_processed"
#> [47] "recount_qc.aligned_reads%.chrm"
#> [48] "recount_qc.aligned_reads%.chrx"
#> [49] "recount_qc.aligned_reads%.chry"
#> [50] "recount_qc.bc_auc.all_reads_all_bases"
#> [51] "recount_qc.bc_auc.all_reads_annotated_bases"
#> [52] "recount_qc.bc_auc.unique_reads_all_bases"
#> [53] "recount_qc.bc_auc.unique_reads_annotated_bases"
#> [54] "recount_qc.bc_auc.all_%"
#> [55] "recount_qc.bc_auc.unique_%"
#> [56] "recount_qc.bc_frag.count"
#> [57] "recount_qc.bc_frag.kallisto_count"
#> [58] "recount_qc.bc_frag.kallisto_mean_length"
#> [59] "recount_qc.bc_frag.mean_length"
#> [60] "recount_qc.bc_frag.mode_length"
#> [61] "recount_qc.bc_frag.mode_length_count"
#> [62] "recount_qc.exon_fc.all_%"
#> [63] "recount_qc.exon_fc.unique_%"
#> [64] "recount_qc.exon_fc_count_all.total"
#> [65] "recount_qc.exon_fc_count_all.assigned"
#> [66] "recount_qc.exon_fc_count_unique.total"
#> [67] "recount_qc.exon_fc_count_unique.assigned"
#> [68] "recount_qc.gene_fc.all_%"
#> [69] "recount_qc.gene_fc.unique_%"
#> [70] "recount_qc.gene_fc_count_all.total"
#> [71] "recount_qc.gene_fc_count_all.assigned"
#> [72] "recount_qc.gene_fc_count_unique.total"
#> [73] "recount_qc.gene_fc_count_unique.assigned"
#> [74] "recount_qc.intron_sum"
#> [75] "recount_qc.intron_sum_%"
#> [76] "recount_qc.star.%_of_chimeric_reads"
#> [77] "recount_qc.star.%_of_chimeric_reads2"
#> [78] "recount_qc.star.%_of_reads_mapped_to_multiple_loci"
#> [79] "recount_qc.star.%_of_reads_mapped_to_multiple_loci2"
#> [80] "recount_qc.star.%_of_reads_mapped_to_too_many_loci"
#> [81] "recount_qc.star.%_of_reads_mapped_to_too_many_loci2"
#> [82] "recount_qc.star.%_of_reads_unmapped:_other"
#> [83] "recount_qc.star.%_of_reads_unmapped:_other2"
#> [84] "recount_qc.star.%_of_reads_unmapped:_too_many_mismatches"
#> [85] "recount_qc.star.%_of_reads_unmapped:_too_many_mismatches2"
#> [86] "recount_qc.star.%_of_reads_unmapped:_too_short"
#> [87] "recount_qc.star.%_of_reads_unmapped:_too_short2"
#> [88] "recount_qc.star.all_mapped_reads"
#> [89] "recount_qc.star.all_mapped_reads2"
#> [90] "recount_qc.star.average_input_read_length"
#> [91] "recount_qc.star.average_input_read_length2"
#> [92] "recount_qc.star.average_mapped_length"
#> [93] "recount_qc.star.average_mapped_length2"
#> [94] "recount_qc.star.deletion_average_length"
#> [95] "recount_qc.star.deletion_average_length2"
#> [96] "recount_qc.star.deletion_rate_per_base"
#> [97] "recount_qc.star.deletion_rate_per_base2"
#> [98] "recount_qc.star.insertion_average_length"
#> [99] "recount_qc.star.insertion_average_length2"
#> [100] "recount_qc.star.insertion_rate_per_base"
#> [101] "recount_qc.star.insertion_rate_per_base2"
#> [102] "recount_qc.star.mapping_speed,_million_of_reads_per_hour"
#> [103] "recount_qc.star.mapping_speed,_million_of_reads_per_hour2"
#> [104] "recount_qc.star.mismatch_rate_per_base,_%"
#> [105] "recount_qc.star.mismatch_rate_per_base,_%2"
#> [106] "recount_qc.star.number_of_chimeric_reads"
#> [107] "recount_qc.star.number_of_chimeric_reads2"
#> [108] "recount_qc.star.number_of_input_reads"
#> [109] "recount_qc.star.number_of_input_reads2"
#> [110] "recount_qc.star.number_of_reads_mapped_to_multiple_loci"
#> [111] "recount_qc.star.number_of_reads_mapped_to_multiple_loci2"
#> [112] "recount_qc.star.number_of_reads_mapped_to_too_many_loci"
#> [113] "recount_qc.star.number_of_reads_mapped_to_too_many_loci2"
#> [114] "recount_qc.star.number_of_reads_unmapped:_other"
#> [115] "recount_qc.star.number_of_reads_unmapped:_other2"
#> [116] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches"
#> [117] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches2"
#> [118] "recount_qc.star.number_of_reads_unmapped:_too_short"
#> [119] "recount_qc.star.number_of_reads_unmapped:_too_short2"
#> [120] "recount_qc.star.number_of_splices:_at/ac"
#> [121] "recount_qc.star.number_of_splices:_at/ac2"
#> [122] "recount_qc.star.number_of_splices:_annotated_(sjdb)"
#> [123] "recount_qc.star.number_of_splices:_annotated_(sjdb)2"
#> [124] "recount_qc.star.number_of_splices:_gc/ag"
#> [125] "recount_qc.star.number_of_splices:_gc/ag2"
#> [126] "recount_qc.star.number_of_splices:_gt/ag"
#> [127] "recount_qc.star.number_of_splices:_gt/ag2"
#> [128] "recount_qc.star.number_of_splices:_non-canonical"
#> [129] "recount_qc.star.number_of_splices:_non-canonical2"
#> [130] "recount_qc.star.number_of_splices:_total"
#> [131] "recount_qc.star.number_of_splices:_total2"
#> [132] "recount_qc.star.uniquely_mapped_reads_%"
#> [133] "recount_qc.star.uniquely_mapped_reads_%2"
#> [134] "recount_qc.star.uniquely_mapped_reads_number"
#> [135] "recount_qc.star.uniquely_mapped_reads_number2"
#> [136] "recount_qc.junction_count"
#> [137] "recount_qc.junction_coverage"
#> [138] "recount_qc.junction_avg_coverage"
#> [139] "recount_qc.star.number_of_input_reads_both"
#> [140] "recount_qc.star.all_mapped_reads_both"
#> [141] "recount_qc.star.number_of_chimeric_reads_both"
#> [142] "recount_qc.star.number_of_reads_mapped_to_multiple_loci_both"
#> [143] "recount_qc.star.number_of_reads_mapped_to_too_many_loci_both"
#> [144] "recount_qc.star.number_of_reads_unmapped:_other_both"
#> [145] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches_both"
#> [146] "recount_qc.star.number_of_reads_unmapped:_too_short_both"
#> [147] "recount_qc.star.uniquely_mapped_reads_number_both"
#> [148] "recount_qc.star.%_mapped_reads_both"
#> [149] "recount_qc.star.%_chimeric_reads_both"
#> [150] "recount_qc.star.%_reads_mapped_to_multiple_loci_both"
#> [151] "recount_qc.star.%_reads_mapped_to_too_many_loci_both"
#> [152] "recount_qc.star.%_reads_unmapped:_other_both"
#> [153] "recount_qc.star.%_reads_unmapped:_too_many_mismatches_both"
#> [154] "recount_qc.star.%_reads_unmapped:_too_short_both"
#> [155] "recount_qc.star.uniquely_mapped_reads_%_both"
#> [156] "recount_seq_qc.min_len"
#> [157] "recount_seq_qc.max_len"
#> [158] "recount_seq_qc.avg_len"
#> [159] "recount_seq_qc.#distinct_quality_values"
#> [160] "recount_seq_qc.#bases"
#> [161] "recount_seq_qc.%a"
#> [162] "recount_seq_qc.%c"
#> [163] "recount_seq_qc.%g"
#> [164] "recount_seq_qc.%t"
#> [165] "recount_seq_qc.%n"
#> [166] "recount_seq_qc.avgq"
#> [167] "recount_seq_qc.errq"
#> [168] "recount_pred.sample_acc.y"
#> [169] "recount_pred.curated.type"
#> [170] "recount_pred.curated.tissue"
#> [171] "recount_pred.pattern.predict.type"
#> [172] "recount_pred.pred.type"
#> [173] "recount_pred.curated.cell_type"
#> [174] "recount_pred.curated.cell_line"
#> [175] "BigWigURL"
## Check how much memory this RSE object uses
pryr::object_size(rse_gene_SRP009615)
#> 24.81 MB
## Create an RSE object using gencode_v29 instead of gencode_v26
rse_gene_SRP009615_gencode_v29 <- create_rse(
proj_info,
annotation = "gencode_v29",
verbose = FALSE
)
rowRanges(rse_gene_SRP009615_gencode_v29)
#> GRanges object with 64837 ranges and 10 metadata columns:
#> seqnames ranges strand | source
#> <Rle> <IRanges> <Rle> | <factor>
#> ENSG00000278704.1 GL000009.2 56140-58376 - | ENSEMBL
#> ENSG00000277400.1 GL000194.1 53590-115018 - | ENSEMBL
#> ENSG00000274847.1 GL000194.1 53594-115055 - | ENSEMBL
#> ENSG00000277428.1 GL000195.1 37434-37534 - | ENSEMBL
#> ENSG00000276256.1 GL000195.1 42939-49164 - | ENSEMBL
#> ... ... ... ... . ...
#> ENSG00000124334.17_PAR_Y chrY 57184101-57197337 + | HAVANA
#> ENSG00000185203.12_PAR_Y chrY 57201143-57203357 - | HAVANA
#> ENSG00000270726.6_PAR_Y chrY 57190738-57208756 + | HAVANA
#> ENSG00000182484.15_PAR_Y chrY 57207346-57212230 + | HAVANA
#> ENSG00000227159.8_PAR_Y chrY 57212184-57214397 - | HAVANA
#> type bp_length phase gene_id
#> <factor> <numeric> <integer> <character>
#> ENSG00000278704.1 gene 2237 <NA> ENSG00000278704.1
#> ENSG00000277400.1 gene 2179 <NA> ENSG00000277400.1
#> ENSG00000274847.1 gene 1599 <NA> ENSG00000274847.1
#> ENSG00000277428.1 gene 101 <NA> ENSG00000277428.1
#> ENSG00000276256.1 gene 2195 <NA> ENSG00000276256.1
#> ... ... ... ... ...
#> ENSG00000124334.17_PAR_Y gene 2504 <NA> ENSG00000124334.17_P..
#> ENSG00000185203.12_PAR_Y gene 1054 <NA> ENSG00000185203.12_P..
#> ENSG00000270726.6_PAR_Y gene 773 <NA> ENSG00000270726.6_PA..
#> ENSG00000182484.15_PAR_Y gene 4618 <NA> ENSG00000182484.15_P..
#> ENSG00000227159.8_PAR_Y gene 1306 <NA> ENSG00000227159.8_PA..
#> gene_type gene_name level
#> <character> <character> <character>
#> ENSG00000278704.1 protein_coding BX004987.1 3
#> ENSG00000277400.1 protein_coding AC145212.1 3
#> ENSG00000274847.1 protein_coding MAFIP 3
#> ENSG00000277428.1 misc_RNA RF00019 3
#> ENSG00000276256.1 protein_coding AC011043.1 3
#> ... ... ... ...
#> ENSG00000124334.17_PAR_Y protein_coding IL9R 2
#> ENSG00000185203.12_PAR_Y antisense WASIR1 2
#> ENSG00000270726.6_PAR_Y processed_transcript AJ271736.1 2
#> ENSG00000182484.15_PAR_Y transcribed_unproces.. WASH6P 2
#> ENSG00000227159.8_PAR_Y unprocessed_pseudogene DDX11L16 2
#> havana_gene tag
#> <character> <character>
#> ENSG00000278704.1 <NA> <NA>
#> ENSG00000277400.1 <NA> <NA>
#> ENSG00000274847.1 <NA> <NA>
#> ENSG00000277428.1 <NA> <NA>
#> ENSG00000276256.1 <NA> <NA>
#> ... ... ...
#> ENSG00000124334.17_PAR_Y OTTHUMG00000022720.1 PAR
#> ENSG00000185203.12_PAR_Y OTTHUMG00000022676.3 PAR
#> ENSG00000270726.6_PAR_Y OTTHUMG00000184987.2 PAR
#> ENSG00000182484.15_PAR_Y OTTHUMG00000022677.5 PAR
#> ENSG00000227159.8_PAR_Y OTTHUMG00000022678.1 PAR
#> -------
#> seqinfo: 406 sequences from an unspecified genome; no seqlengths
## Create an RSE object using FANTOM6_CAT instead of gencode_v26
rse_gene_SRP009615_fantom6_cat <- create_rse(
proj_info,
annotation = "fantom6_cat"
)
#> 2023-05-07 00:10:53.784254 downloading and reading the metadata.
#> 2023-05-07 00:10:54.069448 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:10:54.378223 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:10:54.715142 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:10:55.034494 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:10:55.34915 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:10:55.432427 downloading and reading the feature information.
#> 2023-05-07 00:10:55.710198 caching file human.gene_sums.F006.gtf.gz.
#> 2023-05-07 00:10:56.156776 downloading and reading the counts: 12 samples across 124047 features.
#> 2023-05-07 00:10:56.42205 caching file sra.gene_sums.SRP009615.F006.gz.
#> 2023-05-07 00:10:56.708977 constructing the RangedSummarizedExperiment (rse) object.
rowRanges(rse_gene_SRP009615_fantom6_cat)
#> GRanges object with 124047 ranges and 5 metadata columns:
#> seqnames ranges strand | source type
#> <Rle> <IRanges> <Rle> | <factor> <factor>
#> CATG00000042730 chr1 159537-162485 - | FANTOM6 gene
#> CATG00000042731 chr1 273882-274416 - | FANTOM6 gene
#> ENSG00000223659 chr1 627377-629095 - | FANTOM6 gene
#> ENSG00000225630 chr1 630001-630683 + | FANTOM6 gene
#> ENSG00000225972 chr1 629209-631743 + | FANTOM6 gene
#> ... ... ... ... . ... ...
#> CATG00000114975 chrY 56836712-56851323 + | FANTOM6 gene
#> CATG00000115126 chrY 56855793-56856102 - | FANTOM6 gene
#> CATG00000114976 chrY 56855491-56858320 + | FANTOM6 gene
#> CATG00000114977 chrY 56867675-56882339 + | FANTOM6 gene
#> CATG00000115127 chrY 56884759-56885317 - | FANTOM6 gene
#> bp_length phase gene_id
#> <numeric> <integer> <character>
#> CATG00000042730 2949 <NA> CATG00000042730
#> CATG00000042731 535 <NA> CATG00000042731
#> ENSG00000223659 887 <NA> ENSG00000223659
#> ENSG00000225630 683 <NA> ENSG00000225630
#> ENSG00000225972 438 <NA> ENSG00000225972
#> ... ... ... ...
#> CATG00000114975 4376 <NA> CATG00000114975
#> CATG00000115126 310 <NA> CATG00000115126
#> CATG00000114976 2830 <NA> CATG00000114976
#> CATG00000114977 14665 <NA> CATG00000114977
#> CATG00000115127 559 <NA> CATG00000115127
#> -------
#> seqinfo: 25 sequences from an unspecified genome; no seqlengths
## Create an RSE object using RefSeq instead of gencode_v26
rse_gene_SRP009615_refseq <- create_rse(
proj_info,
annotation = "refseq"
)
#> 2023-05-07 00:10:56.746102 downloading and reading the metadata.
#> 2023-05-07 00:10:57.029173 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:10:57.349174 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:10:57.664051 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:10:57.98792 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:10:58.3011 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:10:58.384231 downloading and reading the feature information.
#> 2023-05-07 00:10:58.64316 caching file human.gene_sums.R109.gtf.gz.
#> 2023-05-07 00:10:59.200787 downloading and reading the counts: 12 samples across 54042 features.
#> 2023-05-07 00:10:59.469325 caching file sra.gene_sums.SRP009615.R109.gz.
#> 2023-05-07 00:10:59.701913 constructing the RangedSummarizedExperiment (rse) object.
rowRanges(rse_gene_SRP009615_refseq)
#> GRanges object with 54042 ranges and 25 metadata columns:
#> seqnames ranges strand | source type bp_length
#> <Rle> <IRanges> <Rle> | <factor> <factor> <numeric>
#> gene14440 GL000008.2 124376-125329 - | RefSeq gene 954
#> gene14441 GL000008.2 153090-153485 - | RefSeq gene 396
#> gene14442 GL000008.2 161987-178171 - | RefSeq gene 1744
#> gene38360 GL000009.2 32290-36345 + | RefSeq gene 1512
#> gene38362 GL000009.2 43342-56258 + | RefSeq gene 972
#> ... ... ... ... . ... ... ...
#> gene54245 chrY 57171874-57172771 - | RefSeq gene 898
#> gene54246 chrY 57184101-57199537 + | RefSeq gene 6193
#> gene54247 chrY 57201084-57203350 - | RefSeq gene 1231
#> gene54248 chrY 57208178-57212192 + | RefSeq gene 1570
#> gene54249 chrY 57212178-57214703 - | RefSeq gene 1643
#> phase gene_id Dbxref Name
#> <integer> <character> <character> <character>
#> gene14440 <NA> gene14440 GeneID:100419019,HGN.. SNX18P15
#> gene14441 <NA> gene14441 GeneID:100419020,HGN.. SNX18P16
#> gene14442 <NA> gene14442 GeneID:100874392,Gen.. NR_046228.1
#> gene38360 <NA> gene38360 GeneID:100533720,HGN.. ANKRD20A15P
#> gene38362 <NA> gene38362 GeneID:105379272,Gen.. XR_949087.2
#> ... ... ... ... ...
#> gene54245 <NA> gene54245 GeneID:644218,HGNC:H.. TRPC6P
#> gene54246 <NA> gene54246 <NA> <NA>
#> gene54247 <NA> gene54247 GeneID:100128260,Gen.. NR_138048.1
#> gene54248 <NA> gene54248 GeneID:653440,HGNC:H.. WASH6P
#> gene54249 <NA> gene54249 GeneID:727856,Genban.. NR_110561.1
#> description gbkey gene_biotype gene_name
#> <character> <character> <character> <character>
#> gene14440 sorting nexin 18 pse.. Gene pseudogene SNX18P15
#> gene14441 sorting nexin 18 pse.. Gene pseudogene SNX18P16
#> gene14442 <NA> misc_RNA <NA> ANKRD20A12P
#> gene38360 ankyrin repeat domai.. Gene pseudogene ANKRD20A15P
#> gene38362 <NA> ncRNA <NA> LOC105379272
#> ... ... ... ... ...
#> gene54245 transient receptor p.. Gene pseudogene TRPC6P
#> gene54246 <NA> mRNA <NA> IL9R
#> gene54247 <NA> ncRNA <NA> WASIR1
#> gene54248 WAS protein family h.. Gene pseudogene WASH6P
#> gene54249 <NA> misc_RNA <NA> DDX11L16
#> pseudo product model_evidence
#> <character> <character> <character>
#> gene14440 true <NA> <NA>
#> gene14441 true <NA> <NA>
#> gene14442 <NA> ankyrin repeat domai.. <NA>
#> gene38360 true <NA> <NA>
#> gene38362 <NA> uncharacterized LOC1.. Supporting evidence ..
#> ... ... ... ...
#> gene54245 true <NA> <NA>
#> gene54246 <NA> <NA> <NA>
#> gene54247 <NA> WASH and IL9R antise.. <NA>
#> gene54248 true <NA> <NA>
#> gene54249 <NA> DEAD/H-box helicase .. <NA>
#> exception partial Note inference geneID
#> <character> <character> <character> <character> <character>
#> gene14440 <NA> <NA> <NA> <NA> <NA>
#> gene14441 <NA> <NA> <NA> <NA> <NA>
#> gene14442 <NA> <NA> <NA> <NA> <NA>
#> gene38360 <NA> <NA> <NA> <NA> <NA>
#> gene38362 <NA> <NA> <NA> <NA> <NA>
#> ... ... ... ... ... ...
#> gene54245 <NA> <NA> <NA> <NA> <NA>
#> gene54246 <NA> <NA> <NA> <NA> <NA>
#> gene54247 <NA> <NA> <NA> <NA> <NA>
#> gene54248 <NA> <NA> <NA> <NA> <NA>
#> gene54249 <NA> <NA> <NA> <NA> <NA>
#> anticodon gene_synonym end_range start_range
#> <character> <character> <character> <character>
#> gene14440 <NA> <NA> <NA> <NA>
#> gene14441 <NA> <NA> <NA> <NA>
#> gene14442 <NA> <NA> <NA> <NA>
#> gene38360 <NA> <NA> <NA> <NA>
#> gene38362 <NA> <NA> <NA> <NA>
#> ... ... ... ... ...
#> gene54245 <NA> TRPC6L <NA> <NA>
#> gene54246 <NA> <NA> <NA> <NA>
#> gene54247 <NA> <NA> <NA> <NA>
#> gene54248 <NA> CXYorf1,FAM39A,WASH <NA> <NA>
#> gene54249 <NA> <NA> <NA> <NA>
#> standard_name codons
#> <character> <character>
#> gene14440 <NA> <NA>
#> gene14441 <NA> <NA>
#> gene14442 <NA> <NA>
#> gene38360 <NA> <NA>
#> gene38362 <NA> <NA>
#> ... ... ...
#> gene54245 <NA> <NA>
#> gene54246 <NA> <NA>
#> gene54247 <NA> <NA>
#> gene54248 <NA> <NA>
#> gene54249 <NA> <NA>
#> -------
#> seqinfo: 436 sequences from an unspecified genome; no seqlengths
## Create an RSE object using ERCC instead of gencode_v26
rse_gene_SRP009615_ercc <- create_rse(
proj_info,
annotation = "ercc"
)
#> 2023-05-07 00:10:59.736376 downloading and reading the metadata.
#> 2023-05-07 00:11:00.02302 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:11:00.345005 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:11:00.66956 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:01.031001 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:01.36341 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:11:01.447554 downloading and reading the feature information.
#> 2023-05-07 00:11:01.718114 caching file human.gene_sums.ERCC.gtf.gz.
#> 2023-05-07 00:11:01.803771 downloading and reading the counts: 12 samples across 92 features.
#> 2023-05-07 00:11:02.067252 caching file sra.gene_sums.SRP009615.ERCC.gz.
#> 2023-05-07 00:11:02.17908 constructing the RangedSummarizedExperiment (rse) object.
rowRanges(rse_gene_SRP009615_ercc)
#> GRanges object with 92 ranges and 6 metadata columns:
#> seqnames ranges strand | source type bp_length
#> <Rle> <IRanges> <Rle> | <factor> <factor> <numeric>
#> ERCC-00002 ERCC-00002 1-1061 + | ERCC gene 1061
#> ERCC-00003 ERCC-00003 1-1023 + | ERCC gene 1023
#> ERCC-00004 ERCC-00004 1-523 + | ERCC gene 523
#> ERCC-00009 ERCC-00009 1-984 + | ERCC gene 984
#> ERCC-00012 ERCC-00012 1-994 + | ERCC gene 994
#> ... ... ... ... . ... ... ...
#> ERCC-00164 ERCC-00164 1-1022 + | ERCC gene 1022
#> ERCC-00165 ERCC-00165 1-872 + | ERCC gene 872
#> ERCC-00168 ERCC-00168 1-1024 + | ERCC gene 1024
#> ERCC-00170 ERCC-00170 1-1023 + | ERCC gene 1023
#> ERCC-00171 ERCC-00171 1-505 + | ERCC gene 505
#> phase gene_id transcript_id
#> <integer> <character> <character>
#> ERCC-00002 <NA> ERCC-00002 DQ459430
#> ERCC-00003 <NA> ERCC-00003 DQ516784
#> ERCC-00004 <NA> ERCC-00004 DQ516752
#> ERCC-00009 <NA> ERCC-00009 DQ668364
#> ERCC-00012 <NA> ERCC-00012 DQ883670
#> ... ... ... ...
#> ERCC-00164 <NA> ERCC-00164 DQ516779
#> ERCC-00165 <NA> ERCC-00165 DQ668363
#> ERCC-00168 <NA> ERCC-00168 DQ516776
#> ERCC-00170 <NA> ERCC-00170 DQ516773
#> ERCC-00171 <NA> ERCC-00171 DQ854994
#> -------
#> seqinfo: 92 sequences from an unspecified genome; no seqlengths
## Create an RSE object using SIRV instead of gencode_v26
rse_gene_SRP009615_sirv <- create_rse(
proj_info,
annotation = "sirv"
)
#> 2023-05-07 00:11:02.211417 downloading and reading the metadata.
#> 2023-05-07 00:11:02.497324 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:11:02.886448 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:11:03.22362 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:03.557758 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:03.889899 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:11:03.989911 downloading and reading the feature information.
#> 2023-05-07 00:11:04.287838 caching file human.gene_sums.SIRV.gtf.gz.
#> 2023-05-07 00:11:04.387151 downloading and reading the counts: 12 samples across 7 features.
#> 2023-05-07 00:11:04.643488 caching file sra.gene_sums.SRP009615.SIRV.gz.
#> 2023-05-07 00:11:04.946357 constructing the RangedSummarizedExperiment (rse) object.
rowRanges(rse_gene_SRP009615_sirv)
#> GRanges object with 7 ranges and 5 metadata columns:
#> seqnames ranges strand | source type bp_length
#> <Rle> <IRanges> <Rle> | <factor> <factor> <numeric>
#> SIRV1 SIRV1 1001-11643 * | LexogenSIRVData gene NA
#> SIRV2 SIRV2 1001-5911 * | LexogenSIRVData gene NA
#> SIRV3 SIRV3 1001-9943 * | LexogenSIRVData gene NA
#> SIRV4 SIRV4 1001-15122 * | LexogenSIRVData gene NA
#> SIRV5 SIRV5 1001-13606 * | LexogenSIRVData gene NA
#> SIRV6 SIRV6 1001-11837 * | LexogenSIRVData gene NA
#> SIRV7 SIRV7 1001-147957 * | LexogenSIRVData gene NA
#> phase gene_id
#> <integer> <character>
#> SIRV1 0 SIRV1
#> SIRV2 0 SIRV2
#> SIRV3 0 SIRV3
#> SIRV4 0 SIRV4
#> SIRV5 0 SIRV5
#> SIRV6 0 SIRV6
#> SIRV7 0 SIRV7
#> -------
#> seqinfo: 7 sequences from an unspecified genome; no seqlengths
## Obtain a list of RSE objects for all gene annotations
rses_gene <- lapply(annotation_options(), function(x) {
create_rse(proj_info, type = "gene", annotation = x)
})
#> 2023-05-07 00:11:04.97308 downloading and reading the metadata.
#> 2023-05-07 00:11:05.255507 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:11:05.573472 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:11:05.896726 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:06.22352 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:06.543992 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:11:06.627071 downloading and reading the feature information.
#> 2023-05-07 00:11:06.926447 caching file human.gene_sums.G026.gtf.gz.
#> 2023-05-07 00:11:07.418048 downloading and reading the counts: 12 samples across 63856 features.
#> 2023-05-07 00:11:07.680464 caching file sra.gene_sums.SRP009615.G026.gz.
#> 2023-05-07 00:11:07.860845 constructing the RangedSummarizedExperiment (rse) object.
#> 2023-05-07 00:11:07.883122 downloading and reading the metadata.
#> 2023-05-07 00:11:08.16528 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:11:08.493349 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:11:08.861712 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:09.19383 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:09.52885 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:11:09.628582 downloading and reading the feature information.
#> 2023-05-07 00:11:09.9042 caching file human.gene_sums.G029.gtf.gz.
#> 2023-05-07 00:11:10.480122 downloading and reading the counts: 12 samples across 64837 features.
#> 2023-05-07 00:11:10.747085 caching file sra.gene_sums.SRP009615.G029.gz.
#> 2023-05-07 00:11:10.916615 constructing the RangedSummarizedExperiment (rse) object.
#> 2023-05-07 00:11:10.938466 downloading and reading the metadata.
#> 2023-05-07 00:11:11.207739 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:11:11.582081 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:11:11.901201 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:12.224091 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:12.547452 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:11:12.632961 downloading and reading the feature information.
#> 2023-05-07 00:11:12.899991 caching file human.gene_sums.F006.gtf.gz.
#> 2023-05-07 00:11:13.338635 downloading and reading the counts: 12 samples across 124047 features.
#> 2023-05-07 00:11:13.593051 caching file sra.gene_sums.SRP009615.F006.gz.
#> 2023-05-07 00:11:13.889259 constructing the RangedSummarizedExperiment (rse) object.
#> 2023-05-07 00:11:13.911845 downloading and reading the metadata.
#> 2023-05-07 00:11:14.172178 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:11:14.515338 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:11:14.835939 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:15.157787 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:15.477671 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:11:15.559414 downloading and reading the feature information.
#> 2023-05-07 00:11:15.816875 caching file human.gene_sums.R109.gtf.gz.
#> 2023-05-07 00:11:16.457783 downloading and reading the counts: 12 samples across 54042 features.
#> 2023-05-07 00:11:16.690823 caching file sra.gene_sums.SRP009615.R109.gz.
#> 2023-05-07 00:11:17.646302 constructing the RangedSummarizedExperiment (rse) object.
#> 2023-05-07 00:11:17.669385 downloading and reading the metadata.
#> 2023-05-07 00:11:17.956727 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:11:18.259673 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:11:18.570068 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:18.891008 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:19.211854 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:11:19.293309 downloading and reading the feature information.
#> 2023-05-07 00:11:19.545496 caching file human.gene_sums.ERCC.gtf.gz.
#> 2023-05-07 00:11:19.628367 downloading and reading the counts: 12 samples across 92 features.
#> 2023-05-07 00:11:19.884436 caching file sra.gene_sums.SRP009615.ERCC.gz.
#> 2023-05-07 00:11:19.993551 constructing the RangedSummarizedExperiment (rse) object.
#> 2023-05-07 00:11:20.01458 downloading and reading the metadata.
#> 2023-05-07 00:11:20.29704 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:11:20.619707 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:11:20.955975 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:21.296625 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:21.632452 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:11:21.729379 downloading and reading the feature information.
#> 2023-05-07 00:11:21.986019 caching file human.gene_sums.SIRV.gtf.gz.
#> 2023-05-07 00:11:22.084423 downloading and reading the counts: 12 samples across 7 features.
#> 2023-05-07 00:11:22.335658 caching file sra.gene_sums.SRP009615.SIRV.gz.
#> 2023-05-07 00:11:22.462359 constructing the RangedSummarizedExperiment (rse) object.
names(rses_gene) <- annotation_options()
rses_gene
#> $gencode_v26
#> class: RangedSummarizedExperiment
#> dim: 63856 12
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(63856): ENSG00000278704.1 ENSG00000277400.1 ...
#> ENSG00000182484.15_PAR_Y ENSG00000227159.8_PAR_Y
#> rowData names(10): source type ... havana_gene tag
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#> recount_pred.curated.cell_line BigWigURL
#>
#> $gencode_v29
#> class: RangedSummarizedExperiment
#> dim: 64837 12
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(64837): ENSG00000278704.1 ENSG00000277400.1 ...
#> ENSG00000182484.15_PAR_Y ENSG00000227159.8_PAR_Y
#> rowData names(10): source type ... havana_gene tag
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#> recount_pred.curated.cell_line BigWigURL
#>
#> $fantom6_cat
#> class: RangedSummarizedExperiment
#> dim: 124047 12
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(124047): CATG00000042730 CATG00000042731 ... CATG00000114977
#> CATG00000115127
#> rowData names(5): source type bp_length phase gene_id
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#> recount_pred.curated.cell_line BigWigURL
#>
#> $refseq
#> class: RangedSummarizedExperiment
#> dim: 54042 12
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(54042): gene14440 gene14441 ... gene54248 gene54249
#> rowData names(25): source type ... standard_name codons
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#> recount_pred.curated.cell_line BigWigURL
#>
#> $ercc
#> class: RangedSummarizedExperiment
#> dim: 92 12
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(92): ERCC-00002 ERCC-00003 ... ERCC-00170 ERCC-00171
#> rowData names(6): source type ... gene_id transcript_id
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#> recount_pred.curated.cell_line BigWigURL
#>
#> $sirv
#> class: RangedSummarizedExperiment
#> dim: 7 12
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(7): SIRV1 SIRV2 ... SIRV6 SIRV7
#> rowData names(5): source type bp_length phase gene_id
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#> recount_pred.curated.cell_line BigWigURL
#>
## Create a RSE object at the exon level
rse_exon_SRP009615 <- create_rse(
proj_info,
type = "exon"
)
#> 2023-05-07 00:11:22.566979 downloading and reading the metadata.
#> 2023-05-07 00:11:22.849387 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:11:23.19788 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:11:23.833256 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:24.190655 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:24.621315 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:11:24.734605 downloading and reading the feature information.
#> 2023-05-07 00:11:25.016062 caching file human.exon_sums.G026.gtf.gz.
#> 2023-05-07 00:11:46.831422 downloading and reading the counts: 12 samples across 1299686 features.
#> 2023-05-07 00:11:47.362236 caching file sra.exon_sums.SRP009615.G026.gz.
#> 2023-05-07 00:11:48.649303 constructing the RangedSummarizedExperiment (rse) object.
## Explore the resulting RSE exon object
rse_exon_SRP009615
#> class: RangedSummarizedExperiment
#> dim: 1299686 12
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(1299686): GL000009.2|56140|58376|- GL000194.1|53594|54832|-
#> ... chrY|57213880|57213964|- chrY|57214350|57214397|-
#> rowData names(21): source type ... ont ccdsid
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#> recount_pred.curated.cell_line BigWigURL
dim(rse_exon_SRP009615)
#> [1] 1299686 12
rowRanges(rse_exon_SRP009615)
#> GRanges object with 1299686 ranges and 21 metadata columns:
#> seqnames ranges strand | source
#> <Rle> <IRanges> <Rle> | <factor>
#> GL000009.2|56140|58376|- GL000009.2 56140-58376 - | ENSEMBL
#> GL000194.1|53594|54832|- GL000194.1 53594-54832 - | ENSEMBL
#> GL000194.1|55446|55676|- GL000194.1 55446-55676 - | ENSEMBL
#> GL000194.1|53590|55676|- GL000194.1 53590-55676 - | ENSEMBL
#> GL000194.1|112792|112850|- GL000194.1 112792-112850 - | ENSEMBL
#> ... ... ... ... . ...
#> chrY|57212184|57213125|- chrY 57212184-57213125 - | HAVANA
#> chrY|57213204|57213357|- chrY 57213204-57213357 - | HAVANA
#> chrY|57213526|57213602|- chrY 57213526-57213602 - | HAVANA
#> chrY|57213880|57213964|- chrY 57213880-57213964 - | HAVANA
#> chrY|57214350|57214397|- chrY 57214350-57214397 - | HAVANA
#> type bp_length phase
#> <factor> <numeric> <integer>
#> GL000009.2|56140|58376|- exon 2237 <NA>
#> GL000194.1|53594|54832|- exon 1239 <NA>
#> GL000194.1|55446|55676|- exon 231 <NA>
#> GL000194.1|53590|55676|- exon 2087 <NA>
#> GL000194.1|112792|112850|- exon 59 <NA>
#> ... ... ... ...
#> chrY|57212184|57213125|- exon 942 <NA>
#> chrY|57213204|57213357|- exon 154 <NA>
#> chrY|57213526|57213602|- exon 77 <NA>
#> chrY|57213880|57213964|- exon 85 <NA>
#> chrY|57214350|57214397|- exon 48 <NA>
#> gene_id transcript_id
#> <character> <character>
#> GL000009.2|56140|58376|- ENSG00000278704.1 ENST00000618686.1
#> GL000194.1|53594|54832|- ENSG00000274847.1 ENST00000400754.4
#> GL000194.1|55446|55676|- ENSG00000274847.1 ENST00000400754.4
#> GL000194.1|53590|55676|- ENSG00000277400.1 ENST00000613230.1
#> GL000194.1|112792|112850|- ENSG00000274847.1 ENST00000400754.4
#> ... ... ...
#> chrY|57212184|57213125|- ENSG00000227159.8_PA.. ENST00000507418.6_PA..
#> chrY|57213204|57213357|- ENSG00000227159.8_PA.. ENST00000507418.6_PA..
#> chrY|57213526|57213602|- ENSG00000227159.8_PA.. ENST00000507418.6_PA..
#> chrY|57213880|57213964|- ENSG00000227159.8_PA.. ENST00000507418.6_PA..
#> chrY|57214350|57214397|- ENSG00000227159.8_PA.. ENST00000507418.6_PA..
#> gene_type gene_name
#> <character> <character>
#> GL000009.2|56140|58376|- protein_coding BX004987.1
#> GL000194.1|53594|54832|- protein_coding AC145212.1
#> GL000194.1|55446|55676|- protein_coding AC145212.1
#> GL000194.1|53590|55676|- protein_coding AC145212.2
#> GL000194.1|112792|112850|- protein_coding AC145212.1
#> ... ... ...
#> chrY|57212184|57213125|- unprocessed_pseudogene DDX11L16
#> chrY|57213204|57213357|- unprocessed_pseudogene DDX11L16
#> chrY|57213526|57213602|- unprocessed_pseudogene DDX11L16
#> chrY|57213880|57213964|- unprocessed_pseudogene DDX11L16
#> chrY|57214350|57214397|- unprocessed_pseudogene DDX11L16
#> transcript_type transcript_name exon_number
#> <character> <character> <character>
#> GL000009.2|56140|58376|- protein_coding BX004987.1-201 1
#> GL000194.1|53594|54832|- protein_coding AC145212.1-201 4
#> GL000194.1|55446|55676|- protein_coding AC145212.1-201 3
#> GL000194.1|53590|55676|- protein_coding AC145212.2-201 3
#> GL000194.1|112792|112850|- protein_coding AC145212.1-201 2
#> ... ... ... ...
#> chrY|57212184|57213125|- unprocessed_pseudogene DDX11L16-001 5
#> chrY|57213204|57213357|- unprocessed_pseudogene DDX11L16-001 4
#> chrY|57213526|57213602|- unprocessed_pseudogene DDX11L16-001 3
#> chrY|57213880|57213964|- unprocessed_pseudogene DDX11L16-001 2
#> chrY|57214350|57214397|- unprocessed_pseudogene DDX11L16-001 1
#> exon_id level protein_id
#> <character> <character> <character>
#> GL000009.2|56140|58376|- ENSE00003753029.1 3 ENSP00000484918.1
#> GL000194.1|53594|54832|- ENSE00002218789.2 3 ENSP00000478910.1
#> GL000194.1|55446|55676|- ENSE00003714436.1 3 ENSP00000478910.1
#> GL000194.1|53590|55676|- ENSE00003723764.1 3 ENSP00000483280.1
#> GL000194.1|112792|112850|- ENSE00003713687.1 3 ENSP00000478910.1
#> ... ... ... ...
#> chrY|57212184|57213125|- ENSE00002023900.1 2 <NA>
#> chrY|57213204|57213357|- ENSE00002036959.1 2 <NA>
#> chrY|57213526|57213602|- ENSE00002021169.1 2 <NA>
#> chrY|57213880|57213964|- ENSE00002046926.1 2 <NA>
#> chrY|57214350|57214397|- ENSE00002072208.1 2 <NA>
#> transcript_support_level tag
#> <character> <character>
#> GL000009.2|56140|58376|- NA basic
#> GL000194.1|53594|54832|- 1 basic
#> GL000194.1|55446|55676|- 1 basic
#> GL000194.1|53590|55676|- 1 basic
#> GL000194.1|112792|112850|- 1 basic
#> ... ... ...
#> chrY|57212184|57213125|- NA PAR
#> chrY|57213204|57213357|- NA PAR
#> chrY|57213526|57213602|- NA PAR
#> chrY|57213880|57213964|- NA PAR
#> chrY|57214350|57214397|- NA PAR
#> recount_exon_id havana_gene
#> <character> <character>
#> GL000009.2|56140|58376|- GL000009.2|56140|583.. <NA>
#> GL000194.1|53594|54832|- GL000194.1|53594|548.. <NA>
#> GL000194.1|55446|55676|- GL000194.1|55446|556.. <NA>
#> GL000194.1|53590|55676|- GL000194.1|53590|556.. <NA>
#> GL000194.1|112792|112850|- GL000194.1|112792|11.. <NA>
#> ... ... ...
#> chrY|57212184|57213125|- chrY|57212184|572131.. OTTHUMG00000022678.1
#> chrY|57213204|57213357|- chrY|57213204|572133.. OTTHUMG00000022678.1
#> chrY|57213526|57213602|- chrY|57213526|572136.. OTTHUMG00000022678.1
#> chrY|57213880|57213964|- chrY|57213880|572139.. OTTHUMG00000022678.1
#> chrY|57214350|57214397|- chrY|57214350|572143.. OTTHUMG00000022678.1
#> havana_transcript ont ccdsid
#> <character> <character> <character>
#> GL000009.2|56140|58376|- <NA> <NA> <NA>
#> GL000194.1|53594|54832|- <NA> <NA> <NA>
#> GL000194.1|55446|55676|- <NA> <NA> <NA>
#> GL000194.1|53590|55676|- <NA> <NA> <NA>
#> GL000194.1|112792|112850|- <NA> <NA> <NA>
#> ... ... ... ...
#> chrY|57212184|57213125|- OTTHUMT00000058841.1 PGO:0000005 <NA>
#> chrY|57213204|57213357|- OTTHUMT00000058841.1 PGO:0000005 <NA>
#> chrY|57213526|57213602|- OTTHUMT00000058841.1 PGO:0000005 <NA>
#> chrY|57213880|57213964|- OTTHUMT00000058841.1 PGO:0000005 <NA>
#> chrY|57214350|57214397|- OTTHUMT00000058841.1 PGO:0000005 <NA>
#> -------
#> seqinfo: 374 sequences from an unspecified genome; no seqlengths
pryr::object_size(rse_exon_SRP009615)
#> 528.18 MB
## Create a RSE object at the exon-exon junction level
rse_jxn_SRP009615 <- create_rse(
proj_info,
type = "jxn"
)
#> 2023-05-07 00:11:52.826972 downloading and reading the metadata.
#> 2023-05-07 00:11:53.116512 caching file sra.sra.SRP009615.MD.gz.
#> 2023-05-07 00:11:53.423539 caching file sra.recount_project.SRP009615.MD.gz.
#> 2023-05-07 00:11:53.746409 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:54.072303 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2023-05-07 00:11:54.386513 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2023-05-07 00:11:54.471267 downloading and reading the feature information.
#> 2023-05-07 00:11:54.790385 caching file sra.junctions.SRP009615.ALL.RR.gz.
#> 2023-05-07 00:11:56.49752 downloading and reading the counts: 12 samples across 281448 features.
#> 2023-05-07 00:11:56.796275 caching file sra.junctions.SRP009615.ALL.MM.gz.
#> 2023-05-07 00:11:57.576621 matching exon-exon junction counts with the metadata.
#> 2023-05-07 00:11:57.838492 caching file sra.junctions.SRP009615.ALL.ID.gz.
#> 2023-05-07 00:11:57.941144 constructing the RangedSummarizedExperiment (rse) object.
## Explore the resulting RSE exon-exon junctions object
rse_jxn_SRP009615
#> class: RangedSummarizedExperiment
#> dim: 281448 12
#> metadata(9): time_created recount3_version ... jxn_format recount3_url
#> assays(1): counts
#> rownames(281448): chr1:11845-12009:+ chr1:12698-13220:+ ...
#> chrY:56848810-56851543:- chrY:56850515-56850921:+
#> rowData names(6): length annotated ... left_annotated right_annotated
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#> recount_pred.curated.cell_line BigWigURL
dim(rse_jxn_SRP009615)
#> [1] 281448 12
rowRanges(rse_jxn_SRP009615)
#> GRanges object with 281448 ranges and 6 metadata columns:
#> seqnames ranges strand | length
#> <Rle> <IRanges> <Rle> | <integer>
#> chr1:11845-12009:+ chr1 11845-12009 + | 165
#> chr1:12698-13220:+ chr1 12698-13220 + | 523
#> chr1:14696-185174:- chr1 14696-185174 - | 170479
#> chr1:14830-14969:- chr1 14830-14969 - | 140
#> chr1:14830-15020:- chr1 14830-15020 - | 191
#> ... ... ... ... . ...
#> chrY:56846131-56846553:+ chrY 56846131-56846553 + | 423
#> chrY:56846268-56846553:+ chrY 56846268-56846553 + | 286
#> chrY:56846486-56846553:+ chrY 56846486-56846553 + | 68
#> chrY:56848810-56851543:- chrY 56848810-56851543 - | 2734
#> chrY:56850515-56850921:+ chrY 56850515-56850921 + | 407
#> annotated left_motif right_motif
#> <integer> <character> <character>
#> chr1:11845-12009:+ 0 GT AG
#> chr1:12698-13220:+ 1 GT AG
#> chr1:14696-185174:- 0 CT AC
#> chr1:14830-14969:- 1 CT AC
#> chr1:14830-15020:- 0 CT AC
#> ... ... ... ...
#> chrY:56846131-56846553:+ 0 GT AG
#> chrY:56846268-56846553:+ 0 GT AG
#> chrY:56846486-56846553:+ 0 GT AG
#> chrY:56848810-56851543:- 0 CT AC
#> chrY:56850515-56850921:+ 0 GT AG
#> left_annotated right_annotated
#> <character> <character>
#> chr1:11845-12009:+ 0 aC19,sG19
#> chr1:12698-13220:+ aC19,gC19,gC24,gC25,.. aC19,cH38,gC19,gC24,..
#> chr1:14696-185174:- 0 0
#> chr1:14830-14969:- aC19,cH38,gC19,kG19,.. aC19,cH38,gC19,kG19,..
#> chr1:14830-15020:- aC19,cH38,gC19,kG19,.. 0
#> ... ... ...
#> chrY:56846131-56846553:+ 0 0
#> chrY:56846268-56846553:+ 0 0
#> chrY:56846486-56846553:+ 0 0
#> chrY:56848810-56851543:- 0 0
#> chrY:56850515-56850921:+ 0 0
#> -------
#> seqinfo: 97 sequences from an unspecified genome; no seqlengths
pryr::object_size(rse_jxn_SRP009615)
#> 60.30 MB
## Obtain a list of RSE objects for all exon annotations
if (FALSE) {
rses_exon <- lapply(annotation_options(), function(x) {
create_rse(proj_info, type = "exon", annotation = x, verbose = FALSE)
})
names(rses_exon) <- annotation_options()
}