Create a recount3 RangedSummarizedExperiment gene or exon object

Once you have identified a project you want to work with, you can use this function to construct a recount3 RangedSummarizedExperiment-class (RSE) object at the gene or exon expression feature level. This function will retrieve the data, cache it, then assemble the RSE object.

create_rse(
  project_info,
  type = c("gene", "exon", "jxn"),
  annotation = annotation_options(project_info$organism),
  bfc = recount3_cache(),
  jxn_format = c("ALL", "UNIQUE"),
  recount3_url = getOption("recount3_url", "http://duffel.rail.bio/recount3"),
  verbose = getOption("recount3_verbose", TRUE)
)

Arguments

project_info: A data.frame() with one row that contains the information for the project you are interested in. You can find which project to work on using available_projects().
type: A character(1) specifying whether you want to access gene, exon, or exon-exon junction counts.
annotation: A character(1) specifying which annotation you want to download. Only used when type is either gene or exon.
bfc: A BiocFileCache-class object where the files will be cached to, typically created by recount3_cache().
jxn_format: A character(1) specifying whether the exon-exon junction files are derived from all the reads (ALL) or only the uniquely mapping read counts (UNIQUE). Note that UNIQUE is only available for some projects: GTEx and TCGA for human.
recount3_url: A character(1) specifying the home URL for recount3 or a local directory where you have mirrored recount3. Defaults to the load balancer http://duffel.rail.bio/recount3, but can also be https://recount-opendata.s3.amazonaws.com/recount3/release from https://registry.opendata.aws/recount/ or SciServer datascope from IDIES at JHU https://sciserver.org/public-data/recount3/data. You can set the R option recount3_url (for example in your .Rprofile) if you have a favorite mirror.
verbose: A logical(1) indicating whether to show messages with updates.

Value

A RangedSummarizedExperiment-class object.

Examples


## Find all available human projects
human_projects <- available_projects()
#> 2025-05-15 15:29:49.137943 caching file sra.recount_project.MD.gz.
#> 2025-05-15 15:29:49.578784 caching file gtex.recount_project.MD.gz.
#> 2025-05-15 15:29:50.077605 caching file tcga.recount_project.MD.gz.

## Find the project you are interested in
proj_info <- subset(
    human_projects,
    project == "SRP009615" & project_type == "data_sources"
)

## Create a RSE object at the gene level
rse_gene_SRP009615 <- create_rse(proj_info)
#> 2025-05-15 15:29:53.325543 downloading and reading the metadata.
#> 2025-05-15 15:29:53.722586 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:29:54.262244 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:29:54.752052 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:29:55.218563 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:29:55.656947 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:29:55.719583 downloading and reading the feature information.
#> 2025-05-15 15:29:56.13275 caching file human.gene_sums.G026.gtf.gz.
#> 2025-05-15 15:29:56.507005 downloading and reading the counts: 12 samples across 63856 features.
#> 2025-05-15 15:29:56.915007 caching file sra.gene_sums.SRP009615.G026.gz.
#> 2025-05-15 15:29:57.055035 constructing the RangedSummarizedExperiment (rse) object.

## Explore the resulting RSE gene object
rse_gene_SRP009615
#> class: RangedSummarizedExperiment 
#> dim: 63856 12 
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(63856): ENSG00000278704.1 ENSG00000277400.1 ...
#>   ENSG00000182484.15_PAR_Y ENSG00000227159.8_PAR_Y
#> rowData names(10): source type ... havana_gene tag
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#>   recount_pred.curated.cell_line BigWigURL

## Information about how this RSE object was made
metadata(rse_gene_SRP009615)
#> $time_created
#> [1] "2025-05-15 15:29:57 UTC"
#> 
#> $recount3_version
#>           package ondiskversion loadedversion                        path
#> recount3 recount3        1.19.2        1.19.2 /__w/_temp/Library/recount3
#>                           loadedpath attached is_base       date       source
#> recount3 /__w/_temp/Library/recount3     TRUE   FALSE 2025-05-15 Bioconductor
#>          md5ok            library
#> recount3    NA /__w/_temp/Library
#> 
#> $project
#> [1] "SRP009615"
#> 
#> $project_home
#> [1] "data_sources/sra"
#> 
#> $type
#> [1] "gene"
#> 
#> $organism
#> [1] "human"
#> 
#> $annotation
#> [1] "gencode_v26"
#> 
#> $recount3_url
#> [1] "http://duffel.rail.bio/recount3"
#> 

## Number of genes by number of samples
dim(rse_gene_SRP009615)
#> [1] 63856    12

## Information about the genes
rowRanges(rse_gene_SRP009615)
#> GRanges object with 63856 ranges and 10 metadata columns:
#>                              seqnames            ranges strand |   source
#>                                 <Rle>         <IRanges>  <Rle> | <factor>
#>          ENSG00000278704.1 GL000009.2       56140-58376      - |  ENSEMBL
#>          ENSG00000277400.1 GL000194.1      53590-115018      - |  ENSEMBL
#>          ENSG00000274847.1 GL000194.1      53594-115055      - |  ENSEMBL
#>          ENSG00000277428.1 GL000195.1       37434-37534      - |  ENSEMBL
#>          ENSG00000276256.1 GL000195.1       42939-49164      - |  ENSEMBL
#>                        ...        ...               ...    ... .      ...
#>   ENSG00000124334.17_PAR_Y       chrY 57184101-57197337      + |   HAVANA
#>   ENSG00000185203.12_PAR_Y       chrY 57201143-57203357      - |   HAVANA
#>    ENSG00000270726.6_PAR_Y       chrY 57190738-57208756      + |   HAVANA
#>   ENSG00000182484.15_PAR_Y       chrY 57207346-57212230      + |   HAVANA
#>    ENSG00000227159.8_PAR_Y       chrY 57212184-57214397      - |   HAVANA
#>                                type bp_length     phase                gene_id
#>                            <factor> <numeric> <integer>            <character>
#>          ENSG00000278704.1     gene      2237      <NA>      ENSG00000278704.1
#>          ENSG00000277400.1     gene      2179      <NA>      ENSG00000277400.1
#>          ENSG00000274847.1     gene      1599      <NA>      ENSG00000274847.1
#>          ENSG00000277428.1     gene       101      <NA>      ENSG00000277428.1
#>          ENSG00000276256.1     gene      2195      <NA>      ENSG00000276256.1
#>                        ...      ...       ...       ...                    ...
#>   ENSG00000124334.17_PAR_Y     gene      2504      <NA> ENSG00000124334.17_P..
#>   ENSG00000185203.12_PAR_Y     gene      1054      <NA> ENSG00000185203.12_P..
#>    ENSG00000270726.6_PAR_Y     gene       773      <NA> ENSG00000270726.6_PA..
#>   ENSG00000182484.15_PAR_Y     gene      4618      <NA> ENSG00000182484.15_P..
#>    ENSG00000227159.8_PAR_Y     gene      1306      <NA> ENSG00000227159.8_PA..
#>                                         gene_type   gene_name       level
#>                                       <character> <character> <character>
#>          ENSG00000278704.1         protein_coding  BX004987.1           3
#>          ENSG00000277400.1         protein_coding  AC145212.2           3
#>          ENSG00000274847.1         protein_coding  AC145212.1           3
#>          ENSG00000277428.1               misc_RNA       Y_RNA           3
#>          ENSG00000276256.1         protein_coding  AC011043.1           3
#>                        ...                    ...         ...         ...
#>   ENSG00000124334.17_PAR_Y         protein_coding        IL9R           2
#>   ENSG00000185203.12_PAR_Y              antisense      WASIR1           2
#>    ENSG00000270726.6_PAR_Y   processed_transcript AJ271736.10           2
#>   ENSG00000182484.15_PAR_Y transcribed_unproces..      WASH6P           2
#>    ENSG00000227159.8_PAR_Y unprocessed_pseudogene    DDX11L16           2
#>                                     havana_gene         tag
#>                                     <character> <character>
#>          ENSG00000278704.1                 <NA>        <NA>
#>          ENSG00000277400.1                 <NA>        <NA>
#>          ENSG00000274847.1                 <NA>        <NA>
#>          ENSG00000277428.1                 <NA>        <NA>
#>          ENSG00000276256.1                 <NA>        <NA>
#>                        ...                  ...         ...
#>   ENSG00000124334.17_PAR_Y OTTHUMG00000022720.1         PAR
#>   ENSG00000185203.12_PAR_Y OTTHUMG00000022676.3         PAR
#>    ENSG00000270726.6_PAR_Y OTTHUMG00000184987.2         PAR
#>   ENSG00000182484.15_PAR_Y OTTHUMG00000022677.5         PAR
#>    ENSG00000227159.8_PAR_Y OTTHUMG00000022678.1         PAR
#>   -------
#>   seqinfo: 374 sequences from an unspecified genome; no seqlengths

## Sample metadata
colnames(colData(rse_gene_SRP009615))
#>   [1] "rail_id"                                                           
#>   [2] "external_id"                                                       
#>   [3] "study"                                                             
#>   [4] "sra.sample_acc.x"                                                  
#>   [5] "sra.experiment_acc"                                                
#>   [6] "sra.submission_acc"                                                
#>   [7] "sra.submission_center"                                             
#>   [8] "sra.submission_lab"                                                
#>   [9] "sra.study_title"                                                   
#>  [10] "sra.study_abstract"                                                
#>  [11] "sra.study_description"                                             
#>  [12] "sra.experiment_title"                                              
#>  [13] "sra.design_description"                                            
#>  [14] "sra.sample_description"                                            
#>  [15] "sra.library_name"                                                  
#>  [16] "sra.library_strategy"                                              
#>  [17] "sra.library_source"                                                
#>  [18] "sra.library_selection"                                             
#>  [19] "sra.library_layout"                                                
#>  [20] "sra.paired_nominal_length"                                         
#>  [21] "sra.paired_nominal_stdev"                                          
#>  [22] "sra.library_construction_protocol"                                 
#>  [23] "sra.platform_model"                                                
#>  [24] "sra.sample_attributes"                                             
#>  [25] "sra.experiment_attributes"                                         
#>  [26] "sra.spot_length"                                                   
#>  [27] "sra.sample_name"                                                   
#>  [28] "sra.sample_title"                                                  
#>  [29] "sra.sample_bases"                                                  
#>  [30] "sra.sample_spots"                                                  
#>  [31] "sra.run_published"                                                 
#>  [32] "sra.size"                                                          
#>  [33] "sra.run_total_bases"                                               
#>  [34] "sra.run_total_spots"                                               
#>  [35] "sra.num_reads"                                                     
#>  [36] "sra.num_spots"                                                     
#>  [37] "sra.read_info"                                                     
#>  [38] "sra.run_alias"                                                     
#>  [39] "sra.run_center_name"                                               
#>  [40] "sra.run_broker_name"                                               
#>  [41] "sra.run_center"                                                    
#>  [42] "recount_project.project"                                           
#>  [43] "recount_project.organism"                                          
#>  [44] "recount_project.file_source"                                       
#>  [45] "recount_project.metadata_source"                                   
#>  [46] "recount_project.date_processed"                                    
#>  [47] "recount_qc.aligned_reads%.chrm"                                    
#>  [48] "recount_qc.aligned_reads%.chrx"                                    
#>  [49] "recount_qc.aligned_reads%.chry"                                    
#>  [50] "recount_qc.bc_auc.all_reads_all_bases"                             
#>  [51] "recount_qc.bc_auc.all_reads_annotated_bases"                       
#>  [52] "recount_qc.bc_auc.unique_reads_all_bases"                          
#>  [53] "recount_qc.bc_auc.unique_reads_annotated_bases"                    
#>  [54] "recount_qc.bc_auc.all_%"                                           
#>  [55] "recount_qc.bc_auc.unique_%"                                        
#>  [56] "recount_qc.bc_frag.count"                                          
#>  [57] "recount_qc.bc_frag.kallisto_count"                                 
#>  [58] "recount_qc.bc_frag.kallisto_mean_length"                           
#>  [59] "recount_qc.bc_frag.mean_length"                                    
#>  [60] "recount_qc.bc_frag.mode_length"                                    
#>  [61] "recount_qc.bc_frag.mode_length_count"                              
#>  [62] "recount_qc.exon_fc.all_%"                                          
#>  [63] "recount_qc.exon_fc.unique_%"                                       
#>  [64] "recount_qc.exon_fc_count_all.total"                                
#>  [65] "recount_qc.exon_fc_count_all.assigned"                             
#>  [66] "recount_qc.exon_fc_count_unique.total"                             
#>  [67] "recount_qc.exon_fc_count_unique.assigned"                          
#>  [68] "recount_qc.gene_fc.all_%"                                          
#>  [69] "recount_qc.gene_fc.unique_%"                                       
#>  [70] "recount_qc.gene_fc_count_all.total"                                
#>  [71] "recount_qc.gene_fc_count_all.assigned"                             
#>  [72] "recount_qc.gene_fc_count_unique.total"                             
#>  [73] "recount_qc.gene_fc_count_unique.assigned"                          
#>  [74] "recount_qc.intron_sum"                                             
#>  [75] "recount_qc.intron_sum_%"                                           
#>  [76] "recount_qc.star.%_of_chimeric_reads"                               
#>  [77] "recount_qc.star.%_of_chimeric_reads2"                              
#>  [78] "recount_qc.star.%_of_reads_mapped_to_multiple_loci"                
#>  [79] "recount_qc.star.%_of_reads_mapped_to_multiple_loci2"               
#>  [80] "recount_qc.star.%_of_reads_mapped_to_too_many_loci"                
#>  [81] "recount_qc.star.%_of_reads_mapped_to_too_many_loci2"               
#>  [82] "recount_qc.star.%_of_reads_unmapped:_other"                        
#>  [83] "recount_qc.star.%_of_reads_unmapped:_other2"                       
#>  [84] "recount_qc.star.%_of_reads_unmapped:_too_many_mismatches"          
#>  [85] "recount_qc.star.%_of_reads_unmapped:_too_many_mismatches2"         
#>  [86] "recount_qc.star.%_of_reads_unmapped:_too_short"                    
#>  [87] "recount_qc.star.%_of_reads_unmapped:_too_short2"                   
#>  [88] "recount_qc.star.all_mapped_reads"                                  
#>  [89] "recount_qc.star.all_mapped_reads2"                                 
#>  [90] "recount_qc.star.average_input_read_length"                         
#>  [91] "recount_qc.star.average_input_read_length2"                        
#>  [92] "recount_qc.star.average_mapped_length"                             
#>  [93] "recount_qc.star.average_mapped_length2"                            
#>  [94] "recount_qc.star.deletion_average_length"                           
#>  [95] "recount_qc.star.deletion_average_length2"                          
#>  [96] "recount_qc.star.deletion_rate_per_base"                            
#>  [97] "recount_qc.star.deletion_rate_per_base2"                           
#>  [98] "recount_qc.star.insertion_average_length"                          
#>  [99] "recount_qc.star.insertion_average_length2"                         
#> [100] "recount_qc.star.insertion_rate_per_base"                           
#> [101] "recount_qc.star.insertion_rate_per_base2"                          
#> [102] "recount_qc.star.mapping_speed,_million_of_reads_per_hour"          
#> [103] "recount_qc.star.mapping_speed,_million_of_reads_per_hour2"         
#> [104] "recount_qc.star.mismatch_rate_per_base,_%"                         
#> [105] "recount_qc.star.mismatch_rate_per_base,_%2"                        
#> [106] "recount_qc.star.number_of_chimeric_reads"                          
#> [107] "recount_qc.star.number_of_chimeric_reads2"                         
#> [108] "recount_qc.star.number_of_input_reads"                             
#> [109] "recount_qc.star.number_of_input_reads2"                            
#> [110] "recount_qc.star.number_of_reads_mapped_to_multiple_loci"           
#> [111] "recount_qc.star.number_of_reads_mapped_to_multiple_loci2"          
#> [112] "recount_qc.star.number_of_reads_mapped_to_too_many_loci"           
#> [113] "recount_qc.star.number_of_reads_mapped_to_too_many_loci2"          
#> [114] "recount_qc.star.number_of_reads_unmapped:_other"                   
#> [115] "recount_qc.star.number_of_reads_unmapped:_other2"                  
#> [116] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches"     
#> [117] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches2"    
#> [118] "recount_qc.star.number_of_reads_unmapped:_too_short"               
#> [119] "recount_qc.star.number_of_reads_unmapped:_too_short2"              
#> [120] "recount_qc.star.number_of_splices:_at/ac"                          
#> [121] "recount_qc.star.number_of_splices:_at/ac2"                         
#> [122] "recount_qc.star.number_of_splices:_annotated_(sjdb)"               
#> [123] "recount_qc.star.number_of_splices:_annotated_(sjdb)2"              
#> [124] "recount_qc.star.number_of_splices:_gc/ag"                          
#> [125] "recount_qc.star.number_of_splices:_gc/ag2"                         
#> [126] "recount_qc.star.number_of_splices:_gt/ag"                          
#> [127] "recount_qc.star.number_of_splices:_gt/ag2"                         
#> [128] "recount_qc.star.number_of_splices:_non-canonical"                  
#> [129] "recount_qc.star.number_of_splices:_non-canonical2"                 
#> [130] "recount_qc.star.number_of_splices:_total"                          
#> [131] "recount_qc.star.number_of_splices:_total2"                         
#> [132] "recount_qc.star.uniquely_mapped_reads_%"                           
#> [133] "recount_qc.star.uniquely_mapped_reads_%2"                          
#> [134] "recount_qc.star.uniquely_mapped_reads_number"                      
#> [135] "recount_qc.star.uniquely_mapped_reads_number2"                     
#> [136] "recount_qc.junction_count"                                         
#> [137] "recount_qc.junction_coverage"                                      
#> [138] "recount_qc.junction_avg_coverage"                                  
#> [139] "recount_qc.star.number_of_input_reads_both"                        
#> [140] "recount_qc.star.all_mapped_reads_both"                             
#> [141] "recount_qc.star.number_of_chimeric_reads_both"                     
#> [142] "recount_qc.star.number_of_reads_mapped_to_multiple_loci_both"      
#> [143] "recount_qc.star.number_of_reads_mapped_to_too_many_loci_both"      
#> [144] "recount_qc.star.number_of_reads_unmapped:_other_both"              
#> [145] "recount_qc.star.number_of_reads_unmapped:_too_many_mismatches_both"
#> [146] "recount_qc.star.number_of_reads_unmapped:_too_short_both"          
#> [147] "recount_qc.star.uniquely_mapped_reads_number_both"                 
#> [148] "recount_qc.star.%_mapped_reads_both"                               
#> [149] "recount_qc.star.%_chimeric_reads_both"                             
#> [150] "recount_qc.star.%_reads_mapped_to_multiple_loci_both"              
#> [151] "recount_qc.star.%_reads_mapped_to_too_many_loci_both"              
#> [152] "recount_qc.star.%_reads_unmapped:_other_both"                      
#> [153] "recount_qc.star.%_reads_unmapped:_too_many_mismatches_both"        
#> [154] "recount_qc.star.%_reads_unmapped:_too_short_both"                  
#> [155] "recount_qc.star.uniquely_mapped_reads_%_both"                      
#> [156] "recount_seq_qc.min_len"                                            
#> [157] "recount_seq_qc.max_len"                                            
#> [158] "recount_seq_qc.avg_len"                                            
#> [159] "recount_seq_qc.#distinct_quality_values"                           
#> [160] "recount_seq_qc.#bases"                                             
#> [161] "recount_seq_qc.%a"                                                 
#> [162] "recount_seq_qc.%c"                                                 
#> [163] "recount_seq_qc.%g"                                                 
#> [164] "recount_seq_qc.%t"                                                 
#> [165] "recount_seq_qc.%n"                                                 
#> [166] "recount_seq_qc.avgq"                                               
#> [167] "recount_seq_qc.errq"                                               
#> [168] "recount_pred.sample_acc.y"                                         
#> [169] "recount_pred.curated.type"                                         
#> [170] "recount_pred.curated.tissue"                                       
#> [171] "recount_pred.pattern.predict.type"                                 
#> [172] "recount_pred.pred.type"                                            
#> [173] "recount_pred.curated.cell_type"                                    
#> [174] "recount_pred.curated.cell_line"                                    
#> [175] "BigWigURL"                                                         

## Check how much memory this RSE object uses
pryr::object_size(rse_gene_SRP009615)
#> 24.81 MB

## Create an RSE object using gencode_v29 instead of gencode_v26
rse_gene_SRP009615_gencode_v29 <- create_rse(
    proj_info,
    annotation = "gencode_v29",
    verbose = FALSE
)
rowRanges(rse_gene_SRP009615_gencode_v29)
#> GRanges object with 64837 ranges and 10 metadata columns:
#>                              seqnames            ranges strand |   source
#>                                 <Rle>         <IRanges>  <Rle> | <factor>
#>          ENSG00000278704.1 GL000009.2       56140-58376      - |  ENSEMBL
#>          ENSG00000277400.1 GL000194.1      53590-115018      - |  ENSEMBL
#>          ENSG00000274847.1 GL000194.1      53594-115055      - |  ENSEMBL
#>          ENSG00000277428.1 GL000195.1       37434-37534      - |  ENSEMBL
#>          ENSG00000276256.1 GL000195.1       42939-49164      - |  ENSEMBL
#>                        ...        ...               ...    ... .      ...
#>   ENSG00000124334.17_PAR_Y       chrY 57184101-57197337      + |   HAVANA
#>   ENSG00000185203.12_PAR_Y       chrY 57201143-57203357      - |   HAVANA
#>    ENSG00000270726.6_PAR_Y       chrY 57190738-57208756      + |   HAVANA
#>   ENSG00000182484.15_PAR_Y       chrY 57207346-57212230      + |   HAVANA
#>    ENSG00000227159.8_PAR_Y       chrY 57212184-57214397      - |   HAVANA
#>                                type bp_length     phase                gene_id
#>                            <factor> <numeric> <integer>            <character>
#>          ENSG00000278704.1     gene      2237      <NA>      ENSG00000278704.1
#>          ENSG00000277400.1     gene      2179      <NA>      ENSG00000277400.1
#>          ENSG00000274847.1     gene      1599      <NA>      ENSG00000274847.1
#>          ENSG00000277428.1     gene       101      <NA>      ENSG00000277428.1
#>          ENSG00000276256.1     gene      2195      <NA>      ENSG00000276256.1
#>                        ...      ...       ...       ...                    ...
#>   ENSG00000124334.17_PAR_Y     gene      2504      <NA> ENSG00000124334.17_P..
#>   ENSG00000185203.12_PAR_Y     gene      1054      <NA> ENSG00000185203.12_P..
#>    ENSG00000270726.6_PAR_Y     gene       773      <NA> ENSG00000270726.6_PA..
#>   ENSG00000182484.15_PAR_Y     gene      4618      <NA> ENSG00000182484.15_P..
#>    ENSG00000227159.8_PAR_Y     gene      1306      <NA> ENSG00000227159.8_PA..
#>                                         gene_type   gene_name       level
#>                                       <character> <character> <character>
#>          ENSG00000278704.1         protein_coding  BX004987.1           3
#>          ENSG00000277400.1         protein_coding  AC145212.1           3
#>          ENSG00000274847.1         protein_coding       MAFIP           3
#>          ENSG00000277428.1               misc_RNA     RF00019           3
#>          ENSG00000276256.1         protein_coding  AC011043.1           3
#>                        ...                    ...         ...         ...
#>   ENSG00000124334.17_PAR_Y         protein_coding        IL9R           2
#>   ENSG00000185203.12_PAR_Y              antisense      WASIR1           2
#>    ENSG00000270726.6_PAR_Y   processed_transcript  AJ271736.1           2
#>   ENSG00000182484.15_PAR_Y transcribed_unproces..      WASH6P           2
#>    ENSG00000227159.8_PAR_Y unprocessed_pseudogene    DDX11L16           2
#>                                     havana_gene         tag
#>                                     <character> <character>
#>          ENSG00000278704.1                 <NA>        <NA>
#>          ENSG00000277400.1                 <NA>        <NA>
#>          ENSG00000274847.1                 <NA>        <NA>
#>          ENSG00000277428.1                 <NA>        <NA>
#>          ENSG00000276256.1                 <NA>        <NA>
#>                        ...                  ...         ...
#>   ENSG00000124334.17_PAR_Y OTTHUMG00000022720.1         PAR
#>   ENSG00000185203.12_PAR_Y OTTHUMG00000022676.3         PAR
#>    ENSG00000270726.6_PAR_Y OTTHUMG00000184987.2         PAR
#>   ENSG00000182484.15_PAR_Y OTTHUMG00000022677.5         PAR
#>    ENSG00000227159.8_PAR_Y OTTHUMG00000022678.1         PAR
#>   -------
#>   seqinfo: 406 sequences from an unspecified genome; no seqlengths

## Create an RSE object using FANTOM6_CAT instead of gencode_v26
rse_gene_SRP009615_fantom6_cat <- create_rse(
    proj_info,
    annotation = "fantom6_cat"
)
#> 2025-05-15 15:30:01.177884 downloading and reading the metadata.
#> 2025-05-15 15:30:01.575588 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:02.081688 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:02.578025 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:03.051783 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:03.512261 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:03.579052 downloading and reading the feature information.
#> 2025-05-15 15:30:03.999482 caching file human.gene_sums.F006.gtf.gz.
#> 2025-05-15 15:30:05.265381 downloading and reading the counts: 12 samples across 124047 features.
#> 2025-05-15 15:30:05.685928 caching file sra.gene_sums.SRP009615.F006.gz.
#> 2025-05-15 15:30:05.850711 constructing the RangedSummarizedExperiment (rse) object.
rowRanges(rse_gene_SRP009615_fantom6_cat)
#> GRanges object with 124047 ranges and 5 metadata columns:
#>                   seqnames            ranges strand |   source     type
#>                      <Rle>         <IRanges>  <Rle> | <factor> <factor>
#>   CATG00000042730     chr1     159537-162485      - |  FANTOM6     gene
#>   CATG00000042731     chr1     273882-274416      - |  FANTOM6     gene
#>   ENSG00000223659     chr1     627377-629095      - |  FANTOM6     gene
#>   ENSG00000225630     chr1     630001-630683      + |  FANTOM6     gene
#>   ENSG00000225972     chr1     629209-631743      + |  FANTOM6     gene
#>               ...      ...               ...    ... .      ...      ...
#>   CATG00000114975     chrY 56836712-56851323      + |  FANTOM6     gene
#>   CATG00000115126     chrY 56855793-56856102      - |  FANTOM6     gene
#>   CATG00000114976     chrY 56855491-56858320      + |  FANTOM6     gene
#>   CATG00000114977     chrY 56867675-56882339      + |  FANTOM6     gene
#>   CATG00000115127     chrY 56884759-56885317      - |  FANTOM6     gene
#>                   bp_length     phase         gene_id
#>                   <numeric> <integer>     <character>
#>   CATG00000042730      2949      <NA> CATG00000042730
#>   CATG00000042731       535      <NA> CATG00000042731
#>   ENSG00000223659       887      <NA> ENSG00000223659
#>   ENSG00000225630       683      <NA> ENSG00000225630
#>   ENSG00000225972       438      <NA> ENSG00000225972
#>               ...       ...       ...             ...
#>   CATG00000114975      4376      <NA> CATG00000114975
#>   CATG00000115126       310      <NA> CATG00000115126
#>   CATG00000114976      2830      <NA> CATG00000114976
#>   CATG00000114977     14665      <NA> CATG00000114977
#>   CATG00000115127       559      <NA> CATG00000115127
#>   -------
#>   seqinfo: 25 sequences from an unspecified genome; no seqlengths

## Create an RSE object using RefSeq instead of gencode_v26
rse_gene_SRP009615_refseq <- create_rse(
    proj_info,
    annotation = "refseq"
)
#> 2025-05-15 15:30:05.881552 downloading and reading the metadata.
#> 2025-05-15 15:30:06.348772 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:06.851717 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:07.428953 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:07.886295 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:08.344836 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:08.409713 downloading and reading the feature information.
#> 2025-05-15 15:30:08.877405 caching file human.gene_sums.R109.gtf.gz.
#> 2025-05-15 15:30:09.283159 downloading and reading the counts: 12 samples across 54042 features.
#> 2025-05-15 15:30:09.677199 caching file sra.gene_sums.SRP009615.R109.gz.
#> 2025-05-15 15:30:09.868549 constructing the RangedSummarizedExperiment (rse) object.
rowRanges(rse_gene_SRP009615_refseq)
#> GRanges object with 54042 ranges and 25 metadata columns:
#>               seqnames            ranges strand |   source     type bp_length
#>                  <Rle>         <IRanges>  <Rle> | <factor> <factor> <numeric>
#>   gene14440 GL000008.2     124376-125329      - |   RefSeq     gene       954
#>   gene14441 GL000008.2     153090-153485      - |   RefSeq     gene       396
#>   gene14442 GL000008.2     161987-178171      - |   RefSeq     gene      1744
#>   gene38360 GL000009.2       32290-36345      + |   RefSeq     gene      1512
#>   gene38362 GL000009.2       43342-56258      + |   RefSeq     gene       972
#>         ...        ...               ...    ... .      ...      ...       ...
#>   gene54245       chrY 57171874-57172771      - |   RefSeq     gene       898
#>   gene54246       chrY 57184101-57199537      + |   RefSeq     gene      6193
#>   gene54247       chrY 57201084-57203350      - |   RefSeq     gene      1231
#>   gene54248       chrY 57208178-57212192      + |   RefSeq     gene      1570
#>   gene54249       chrY 57212178-57214703      - |   RefSeq     gene      1643
#>                 phase     gene_id                 Dbxref        Name
#>             <integer> <character>            <character> <character>
#>   gene14440      <NA>   gene14440 GeneID:100419019,HGN..    SNX18P15
#>   gene14441      <NA>   gene14441 GeneID:100419020,HGN..    SNX18P16
#>   gene14442      <NA>   gene14442 GeneID:100874392,Gen.. NR_046228.1
#>   gene38360      <NA>   gene38360 GeneID:100533720,HGN.. ANKRD20A15P
#>   gene38362      <NA>   gene38362 GeneID:105379272,Gen.. XR_949087.2
#>         ...       ...         ...                    ...         ...
#>   gene54245      <NA>   gene54245 GeneID:644218,HGNC:H..      TRPC6P
#>   gene54246      <NA>   gene54246                   <NA>        <NA>
#>   gene54247      <NA>   gene54247 GeneID:100128260,Gen.. NR_138048.1
#>   gene54248      <NA>   gene54248 GeneID:653440,HGNC:H..      WASH6P
#>   gene54249      <NA>   gene54249 GeneID:727856,Genban.. NR_110561.1
#>                        description       gbkey gene_biotype    gene_name
#>                        <character> <character>  <character>  <character>
#>   gene14440 sorting nexin 18 pse..        Gene   pseudogene     SNX18P15
#>   gene14441 sorting nexin 18 pse..        Gene   pseudogene     SNX18P16
#>   gene14442                   <NA>    misc_RNA         <NA>  ANKRD20A12P
#>   gene38360 ankyrin repeat domai..        Gene   pseudogene  ANKRD20A15P
#>   gene38362                   <NA>       ncRNA         <NA> LOC105379272
#>         ...                    ...         ...          ...          ...
#>   gene54245 transient receptor p..        Gene   pseudogene       TRPC6P
#>   gene54246                   <NA>        mRNA         <NA>         IL9R
#>   gene54247                   <NA>       ncRNA         <NA>       WASIR1
#>   gene54248 WAS protein family h..        Gene   pseudogene       WASH6P
#>   gene54249                   <NA>    misc_RNA         <NA>     DDX11L16
#>                  pseudo                product         model_evidence
#>             <character>            <character>            <character>
#>   gene14440        true                   <NA>                   <NA>
#>   gene14441        true                   <NA>                   <NA>
#>   gene14442        <NA> ankyrin repeat domai..                   <NA>
#>   gene38360        true                   <NA>                   <NA>
#>   gene38362        <NA> uncharacterized LOC1.. Supporting evidence ..
#>         ...         ...                    ...                    ...
#>   gene54245        true                   <NA>                   <NA>
#>   gene54246        <NA>                   <NA>                   <NA>
#>   gene54247        <NA> WASH and IL9R antise..                   <NA>
#>   gene54248        true                   <NA>                   <NA>
#>   gene54249        <NA> DEAD/H-box helicase ..                   <NA>
#>               exception     partial        Note   inference      geneID
#>             <character> <character> <character> <character> <character>
#>   gene14440        <NA>        <NA>        <NA>        <NA>        <NA>
#>   gene14441        <NA>        <NA>        <NA>        <NA>        <NA>
#>   gene14442        <NA>        <NA>        <NA>        <NA>        <NA>
#>   gene38360        <NA>        <NA>        <NA>        <NA>        <NA>
#>   gene38362        <NA>        <NA>        <NA>        <NA>        <NA>
#>         ...         ...         ...         ...         ...         ...
#>   gene54245        <NA>        <NA>        <NA>        <NA>        <NA>
#>   gene54246        <NA>        <NA>        <NA>        <NA>        <NA>
#>   gene54247        <NA>        <NA>        <NA>        <NA>        <NA>
#>   gene54248        <NA>        <NA>        <NA>        <NA>        <NA>
#>   gene54249        <NA>        <NA>        <NA>        <NA>        <NA>
#>               anticodon        gene_synonym   end_range start_range
#>             <character>         <character> <character> <character>
#>   gene14440        <NA>                <NA>        <NA>        <NA>
#>   gene14441        <NA>                <NA>        <NA>        <NA>
#>   gene14442        <NA>                <NA>        <NA>        <NA>
#>   gene38360        <NA>                <NA>        <NA>        <NA>
#>   gene38362        <NA>                <NA>        <NA>        <NA>
#>         ...         ...                 ...         ...         ...
#>   gene54245        <NA>              TRPC6L        <NA>        <NA>
#>   gene54246        <NA>                <NA>        <NA>        <NA>
#>   gene54247        <NA>                <NA>        <NA>        <NA>
#>   gene54248        <NA> CXYorf1,FAM39A,WASH        <NA>        <NA>
#>   gene54249        <NA>                <NA>        <NA>        <NA>
#>             standard_name      codons
#>               <character> <character>
#>   gene14440          <NA>        <NA>
#>   gene14441          <NA>        <NA>
#>   gene14442          <NA>        <NA>
#>   gene38360          <NA>        <NA>
#>   gene38362          <NA>        <NA>
#>         ...           ...         ...
#>   gene54245          <NA>        <NA>
#>   gene54246          <NA>        <NA>
#>   gene54247          <NA>        <NA>
#>   gene54248          <NA>        <NA>
#>   gene54249          <NA>        <NA>
#>   -------
#>   seqinfo: 436 sequences from an unspecified genome; no seqlengths

## Create an RSE object using ERCC instead of gencode_v26
rse_gene_SRP009615_ercc <- create_rse(
    proj_info,
    annotation = "ercc"
)
#> 2025-05-15 15:30:09.899231 downloading and reading the metadata.
#> 2025-05-15 15:30:10.322225 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:10.839303 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:11.415692 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:11.869738 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:12.308722 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:12.371791 downloading and reading the feature information.
#> 2025-05-15 15:30:12.7351 caching file human.gene_sums.ERCC.gtf.gz.
#> 2025-05-15 15:30:12.799005 downloading and reading the counts: 12 samples across 92 features.
#> 2025-05-15 15:30:13.200642 caching file sra.gene_sums.SRP009615.ERCC.gz.
#> 2025-05-15 15:30:13.288506 constructing the RangedSummarizedExperiment (rse) object.
rowRanges(rse_gene_SRP009615_ercc)
#> GRanges object with 92 ranges and 6 metadata columns:
#>                seqnames    ranges strand |   source     type bp_length
#>                   <Rle> <IRanges>  <Rle> | <factor> <factor> <numeric>
#>   ERCC-00002 ERCC-00002    1-1061      + |     ERCC     gene      1061
#>   ERCC-00003 ERCC-00003    1-1023      + |     ERCC     gene      1023
#>   ERCC-00004 ERCC-00004     1-523      + |     ERCC     gene       523
#>   ERCC-00009 ERCC-00009     1-984      + |     ERCC     gene       984
#>   ERCC-00012 ERCC-00012     1-994      + |     ERCC     gene       994
#>          ...        ...       ...    ... .      ...      ...       ...
#>   ERCC-00164 ERCC-00164    1-1022      + |     ERCC     gene      1022
#>   ERCC-00165 ERCC-00165     1-872      + |     ERCC     gene       872
#>   ERCC-00168 ERCC-00168    1-1024      + |     ERCC     gene      1024
#>   ERCC-00170 ERCC-00170    1-1023      + |     ERCC     gene      1023
#>   ERCC-00171 ERCC-00171     1-505      + |     ERCC     gene       505
#>                  phase     gene_id transcript_id
#>              <integer> <character>   <character>
#>   ERCC-00002      <NA>  ERCC-00002      DQ459430
#>   ERCC-00003      <NA>  ERCC-00003      DQ516784
#>   ERCC-00004      <NA>  ERCC-00004      DQ516752
#>   ERCC-00009      <NA>  ERCC-00009      DQ668364
#>   ERCC-00012      <NA>  ERCC-00012      DQ883670
#>          ...       ...         ...           ...
#>   ERCC-00164      <NA>  ERCC-00164      DQ516779
#>   ERCC-00165      <NA>  ERCC-00165      DQ668363
#>   ERCC-00168      <NA>  ERCC-00168      DQ516776
#>   ERCC-00170      <NA>  ERCC-00170      DQ516773
#>   ERCC-00171      <NA>  ERCC-00171      DQ854994
#>   -------
#>   seqinfo: 92 sequences from an unspecified genome; no seqlengths

## Create an RSE object using SIRV instead of gencode_v26
rse_gene_SRP009615_sirv <- create_rse(
    proj_info,
    annotation = "sirv"
)
#> 2025-05-15 15:30:13.316385 downloading and reading the metadata.
#> 2025-05-15 15:30:13.696844 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:14.176732 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:14.706131 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:15.15968 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:15.650716 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:15.723094 downloading and reading the feature information.
#> 2025-05-15 15:30:16.219256 caching file human.gene_sums.SIRV.gtf.gz.
#> 2025-05-15 15:30:16.289654 downloading and reading the counts: 12 samples across 7 features.
#> 2025-05-15 15:30:16.747496 caching file sra.gene_sums.SRP009615.SIRV.gz.
#> 2025-05-15 15:30:16.844564 constructing the RangedSummarizedExperiment (rse) object.
rowRanges(rse_gene_SRP009615_sirv)
#> GRanges object with 7 ranges and 5 metadata columns:
#>         seqnames      ranges strand |          source     type bp_length
#>            <Rle>   <IRanges>  <Rle> |        <factor> <factor> <numeric>
#>   SIRV1    SIRV1  1001-11643      * | LexogenSIRVData     gene        NA
#>   SIRV2    SIRV2   1001-5911      * | LexogenSIRVData     gene        NA
#>   SIRV3    SIRV3   1001-9943      * | LexogenSIRVData     gene        NA
#>   SIRV4    SIRV4  1001-15122      * | LexogenSIRVData     gene        NA
#>   SIRV5    SIRV5  1001-13606      * | LexogenSIRVData     gene        NA
#>   SIRV6    SIRV6  1001-11837      * | LexogenSIRVData     gene        NA
#>   SIRV7    SIRV7 1001-147957      * | LexogenSIRVData     gene        NA
#>             phase     gene_id
#>         <integer> <character>
#>   SIRV1         0       SIRV1
#>   SIRV2         0       SIRV2
#>   SIRV3         0       SIRV3
#>   SIRV4         0       SIRV4
#>   SIRV5         0       SIRV5
#>   SIRV6         0       SIRV6
#>   SIRV7         0       SIRV7
#>   -------
#>   seqinfo: 7 sequences from an unspecified genome; no seqlengths

## Obtain a list of RSE objects for all gene annotations
rses_gene <- lapply(annotation_options(), function(x) {
    create_rse(proj_info, type = "gene", annotation = x)
})
#> 2025-05-15 15:30:16.868747 downloading and reading the metadata.
#> 2025-05-15 15:30:17.345268 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:17.81811 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:18.317358 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:18.837119 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:19.28682 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:19.346519 downloading and reading the feature information.
#> 2025-05-15 15:30:19.737709 caching file human.gene_sums.G026.gtf.gz.
#> 2025-05-15 15:30:20.077452 downloading and reading the counts: 12 samples across 63856 features.
#> 2025-05-15 15:30:20.453467 caching file sra.gene_sums.SRP009615.G026.gz.
#> 2025-05-15 15:30:20.58795 constructing the RangedSummarizedExperiment (rse) object.
#> 2025-05-15 15:30:20.608069 downloading and reading the metadata.
#> 2025-05-15 15:30:21.092493 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:21.684575 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:22.197029 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:22.758612 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:23.214198 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:23.285857 downloading and reading the feature information.
#> 2025-05-15 15:30:23.651547 caching file human.gene_sums.G029.gtf.gz.
#> 2025-05-15 15:30:24.068461 downloading and reading the counts: 12 samples across 64837 features.
#> 2025-05-15 15:30:24.502613 caching file sra.gene_sums.SRP009615.G029.gz.
#> 2025-05-15 15:30:24.63719 constructing the RangedSummarizedExperiment (rse) object.
#> 2025-05-15 15:30:24.657545 downloading and reading the metadata.
#> 2025-05-15 15:30:25.100451 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:25.587946 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:26.059018 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:26.514706 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:26.968381 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:27.94902 downloading and reading the feature information.
#> 2025-05-15 15:30:28.394745 caching file human.gene_sums.F006.gtf.gz.
#> 2025-05-15 15:30:28.704276 downloading and reading the counts: 12 samples across 124047 features.
#> 2025-05-15 15:30:29.12554 caching file sra.gene_sums.SRP009615.F006.gz.
#> 2025-05-15 15:30:29.295429 constructing the RangedSummarizedExperiment (rse) object.
#> 2025-05-15 15:30:29.316711 downloading and reading the metadata.
#> 2025-05-15 15:30:29.759143 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:30.285507 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:30.879139 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:31.344373 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:31.792055 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:31.861686 downloading and reading the feature information.
#> 2025-05-15 15:30:32.332628 caching file human.gene_sums.R109.gtf.gz.
#> 2025-05-15 15:30:32.751497 downloading and reading the counts: 12 samples across 54042 features.
#> 2025-05-15 15:30:33.19961 caching file sra.gene_sums.SRP009615.R109.gz.
#> 2025-05-15 15:30:33.39428 constructing the RangedSummarizedExperiment (rse) object.
#> 2025-05-15 15:30:33.41397 downloading and reading the metadata.
#> 2025-05-15 15:30:33.846288 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:34.333627 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:34.924231 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:35.377321 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:35.845562 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:35.909279 downloading and reading the feature information.
#> 2025-05-15 15:30:36.287632 caching file human.gene_sums.ERCC.gtf.gz.
#> 2025-05-15 15:30:36.353005 downloading and reading the counts: 12 samples across 92 features.
#> 2025-05-15 15:30:36.735041 caching file sra.gene_sums.SRP009615.ERCC.gz.
#> 2025-05-15 15:30:36.825157 constructing the RangedSummarizedExperiment (rse) object.
#> 2025-05-15 15:30:36.844433 downloading and reading the metadata.
#> 2025-05-15 15:30:37.309666 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:37.832231 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:38.448784 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:38.918429 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:39.451695 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:39.526625 downloading and reading the feature information.
#> 2025-05-15 15:30:40.012407 caching file human.gene_sums.SIRV.gtf.gz.
#> 2025-05-15 15:30:40.087957 downloading and reading the counts: 12 samples across 7 features.
#> 2025-05-15 15:30:40.568277 caching file sra.gene_sums.SRP009615.SIRV.gz.
#> 2025-05-15 15:30:40.66928 constructing the RangedSummarizedExperiment (rse) object.
names(rses_gene) <- annotation_options()
rses_gene
#> $gencode_v26
#> class: RangedSummarizedExperiment 
#> dim: 63856 12 
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(63856): ENSG00000278704.1 ENSG00000277400.1 ...
#>   ENSG00000182484.15_PAR_Y ENSG00000227159.8_PAR_Y
#> rowData names(10): source type ... havana_gene tag
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#>   recount_pred.curated.cell_line BigWigURL
#> 
#> $gencode_v29
#> class: RangedSummarizedExperiment 
#> dim: 64837 12 
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(64837): ENSG00000278704.1 ENSG00000277400.1 ...
#>   ENSG00000182484.15_PAR_Y ENSG00000227159.8_PAR_Y
#> rowData names(10): source type ... havana_gene tag
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#>   recount_pred.curated.cell_line BigWigURL
#> 
#> $fantom6_cat
#> class: RangedSummarizedExperiment 
#> dim: 124047 12 
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(124047): CATG00000042730 CATG00000042731 ... CATG00000114977
#>   CATG00000115127
#> rowData names(5): source type bp_length phase gene_id
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#>   recount_pred.curated.cell_line BigWigURL
#> 
#> $refseq
#> class: RangedSummarizedExperiment 
#> dim: 54042 12 
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(54042): gene14440 gene14441 ... gene54248 gene54249
#> rowData names(25): source type ... standard_name codons
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#>   recount_pred.curated.cell_line BigWigURL
#> 
#> $ercc
#> class: RangedSummarizedExperiment 
#> dim: 92 12 
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(92): ERCC-00002 ERCC-00003 ... ERCC-00170 ERCC-00171
#> rowData names(6): source type ... gene_id transcript_id
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#>   recount_pred.curated.cell_line BigWigURL
#> 
#> $sirv
#> class: RangedSummarizedExperiment 
#> dim: 7 12 
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(7): SIRV1 SIRV2 ... SIRV6 SIRV7
#> rowData names(5): source type bp_length phase gene_id
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#>   recount_pred.curated.cell_line BigWigURL
#> 

## Create a RSE object at the exon level
rse_exon_SRP009615 <- create_rse(
    proj_info,
    type = "exon"
)
#> 2025-05-15 15:30:40.738579 downloading and reading the metadata.
#> 2025-05-15 15:30:41.16794 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:30:41.706607 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:30:42.478145 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:42.945529 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:30:43.471294 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:30:43.554595 downloading and reading the feature information.
#> 2025-05-15 15:30:43.941675 caching file human.exon_sums.G026.gtf.gz.
#> 2025-05-15 15:31:01.098442 downloading and reading the counts: 12 samples across 1299686 features.
#> 2025-05-15 15:31:01.679623 caching file sra.exon_sums.SRP009615.G026.gz.
#> 2025-05-15 15:31:02.649283 constructing the RangedSummarizedExperiment (rse) object.

## Explore the resulting RSE exon object
rse_exon_SRP009615
#> class: RangedSummarizedExperiment 
#> dim: 1299686 12 
#> metadata(8): time_created recount3_version ... annotation recount3_url
#> assays(1): raw_counts
#> rownames(1299686): GL000009.2|56140|58376|- GL000194.1|53594|54832|-
#>   ... chrY|57213880|57213964|- chrY|57214350|57214397|-
#> rowData names(21): source type ... ont ccdsid
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#>   recount_pred.curated.cell_line BigWigURL

dim(rse_exon_SRP009615)
#> [1] 1299686      12
rowRanges(rse_exon_SRP009615)
#> GRanges object with 1299686 ranges and 21 metadata columns:
#>                                seqnames            ranges strand |   source
#>                                   <Rle>         <IRanges>  <Rle> | <factor>
#>     GL000009.2|56140|58376|- GL000009.2       56140-58376      - |  ENSEMBL
#>     GL000194.1|53594|54832|- GL000194.1       53594-54832      - |  ENSEMBL
#>     GL000194.1|55446|55676|- GL000194.1       55446-55676      - |  ENSEMBL
#>     GL000194.1|53590|55676|- GL000194.1       53590-55676      - |  ENSEMBL
#>   GL000194.1|112792|112850|- GL000194.1     112792-112850      - |  ENSEMBL
#>                          ...        ...               ...    ... .      ...
#>     chrY|57212184|57213125|-       chrY 57212184-57213125      - |   HAVANA
#>     chrY|57213204|57213357|-       chrY 57213204-57213357      - |   HAVANA
#>     chrY|57213526|57213602|-       chrY 57213526-57213602      - |   HAVANA
#>     chrY|57213880|57213964|-       chrY 57213880-57213964      - |   HAVANA
#>     chrY|57214350|57214397|-       chrY 57214350-57214397      - |   HAVANA
#>                                  type bp_length     phase
#>                              <factor> <numeric> <integer>
#>     GL000009.2|56140|58376|-     exon      2237      <NA>
#>     GL000194.1|53594|54832|-     exon      1239      <NA>
#>     GL000194.1|55446|55676|-     exon       231      <NA>
#>     GL000194.1|53590|55676|-     exon      2087      <NA>
#>   GL000194.1|112792|112850|-     exon        59      <NA>
#>                          ...      ...       ...       ...
#>     chrY|57212184|57213125|-     exon       942      <NA>
#>     chrY|57213204|57213357|-     exon       154      <NA>
#>     chrY|57213526|57213602|-     exon        77      <NA>
#>     chrY|57213880|57213964|-     exon        85      <NA>
#>     chrY|57214350|57214397|-     exon        48      <NA>
#>                                             gene_id          transcript_id
#>                                         <character>            <character>
#>     GL000009.2|56140|58376|-      ENSG00000278704.1      ENST00000618686.1
#>     GL000194.1|53594|54832|-      ENSG00000274847.1      ENST00000400754.4
#>     GL000194.1|55446|55676|-      ENSG00000274847.1      ENST00000400754.4
#>     GL000194.1|53590|55676|-      ENSG00000277400.1      ENST00000613230.1
#>   GL000194.1|112792|112850|-      ENSG00000274847.1      ENST00000400754.4
#>                          ...                    ...                    ...
#>     chrY|57212184|57213125|- ENSG00000227159.8_PA.. ENST00000507418.6_PA..
#>     chrY|57213204|57213357|- ENSG00000227159.8_PA.. ENST00000507418.6_PA..
#>     chrY|57213526|57213602|- ENSG00000227159.8_PA.. ENST00000507418.6_PA..
#>     chrY|57213880|57213964|- ENSG00000227159.8_PA.. ENST00000507418.6_PA..
#>     chrY|57214350|57214397|- ENSG00000227159.8_PA.. ENST00000507418.6_PA..
#>                                           gene_type   gene_name
#>                                         <character> <character>
#>     GL000009.2|56140|58376|-         protein_coding  BX004987.1
#>     GL000194.1|53594|54832|-         protein_coding  AC145212.1
#>     GL000194.1|55446|55676|-         protein_coding  AC145212.1
#>     GL000194.1|53590|55676|-         protein_coding  AC145212.2
#>   GL000194.1|112792|112850|-         protein_coding  AC145212.1
#>                          ...                    ...         ...
#>     chrY|57212184|57213125|- unprocessed_pseudogene    DDX11L16
#>     chrY|57213204|57213357|- unprocessed_pseudogene    DDX11L16
#>     chrY|57213526|57213602|- unprocessed_pseudogene    DDX11L16
#>     chrY|57213880|57213964|- unprocessed_pseudogene    DDX11L16
#>     chrY|57214350|57214397|- unprocessed_pseudogene    DDX11L16
#>                                     transcript_type transcript_name exon_number
#>                                         <character>     <character> <character>
#>     GL000009.2|56140|58376|-         protein_coding  BX004987.1-201           1
#>     GL000194.1|53594|54832|-         protein_coding  AC145212.1-201           4
#>     GL000194.1|55446|55676|-         protein_coding  AC145212.1-201           3
#>     GL000194.1|53590|55676|-         protein_coding  AC145212.2-201           3
#>   GL000194.1|112792|112850|-         protein_coding  AC145212.1-201           2
#>                          ...                    ...             ...         ...
#>     chrY|57212184|57213125|- unprocessed_pseudogene    DDX11L16-001           5
#>     chrY|57213204|57213357|- unprocessed_pseudogene    DDX11L16-001           4
#>     chrY|57213526|57213602|- unprocessed_pseudogene    DDX11L16-001           3
#>     chrY|57213880|57213964|- unprocessed_pseudogene    DDX11L16-001           2
#>     chrY|57214350|57214397|- unprocessed_pseudogene    DDX11L16-001           1
#>                                        exon_id       level        protein_id
#>                                    <character> <character>       <character>
#>     GL000009.2|56140|58376|- ENSE00003753029.1           3 ENSP00000484918.1
#>     GL000194.1|53594|54832|- ENSE00002218789.2           3 ENSP00000478910.1
#>     GL000194.1|55446|55676|- ENSE00003714436.1           3 ENSP00000478910.1
#>     GL000194.1|53590|55676|- ENSE00003723764.1           3 ENSP00000483280.1
#>   GL000194.1|112792|112850|- ENSE00003713687.1           3 ENSP00000478910.1
#>                          ...               ...         ...               ...
#>     chrY|57212184|57213125|- ENSE00002023900.1           2              <NA>
#>     chrY|57213204|57213357|- ENSE00002036959.1           2              <NA>
#>     chrY|57213526|57213602|- ENSE00002021169.1           2              <NA>
#>     chrY|57213880|57213964|- ENSE00002046926.1           2              <NA>
#>     chrY|57214350|57214397|- ENSE00002072208.1           2              <NA>
#>                              transcript_support_level         tag
#>                                           <character> <character>
#>     GL000009.2|56140|58376|-                       NA       basic
#>     GL000194.1|53594|54832|-                        1       basic
#>     GL000194.1|55446|55676|-                        1       basic
#>     GL000194.1|53590|55676|-                        1       basic
#>   GL000194.1|112792|112850|-                        1       basic
#>                          ...                      ...         ...
#>     chrY|57212184|57213125|-                       NA         PAR
#>     chrY|57213204|57213357|-                       NA         PAR
#>     chrY|57213526|57213602|-                       NA         PAR
#>     chrY|57213880|57213964|-                       NA         PAR
#>     chrY|57214350|57214397|-                       NA         PAR
#>                                     recount_exon_id          havana_gene
#>                                         <character>          <character>
#>     GL000009.2|56140|58376|- GL000009.2|56140|583..                 <NA>
#>     GL000194.1|53594|54832|- GL000194.1|53594|548..                 <NA>
#>     GL000194.1|55446|55676|- GL000194.1|55446|556..                 <NA>
#>     GL000194.1|53590|55676|- GL000194.1|53590|556..                 <NA>
#>   GL000194.1|112792|112850|- GL000194.1|112792|11..                 <NA>
#>                          ...                    ...                  ...
#>     chrY|57212184|57213125|- chrY|57212184|572131.. OTTHUMG00000022678.1
#>     chrY|57213204|57213357|- chrY|57213204|572133.. OTTHUMG00000022678.1
#>     chrY|57213526|57213602|- chrY|57213526|572136.. OTTHUMG00000022678.1
#>     chrY|57213880|57213964|- chrY|57213880|572139.. OTTHUMG00000022678.1
#>     chrY|57214350|57214397|- chrY|57214350|572143.. OTTHUMG00000022678.1
#>                                 havana_transcript         ont      ccdsid
#>                                       <character> <character> <character>
#>     GL000009.2|56140|58376|-                 <NA>        <NA>        <NA>
#>     GL000194.1|53594|54832|-                 <NA>        <NA>        <NA>
#>     GL000194.1|55446|55676|-                 <NA>        <NA>        <NA>
#>     GL000194.1|53590|55676|-                 <NA>        <NA>        <NA>
#>   GL000194.1|112792|112850|-                 <NA>        <NA>        <NA>
#>                          ...                  ...         ...         ...
#>     chrY|57212184|57213125|- OTTHUMT00000058841.1 PGO:0000005        <NA>
#>     chrY|57213204|57213357|- OTTHUMT00000058841.1 PGO:0000005        <NA>
#>     chrY|57213526|57213602|- OTTHUMT00000058841.1 PGO:0000005        <NA>
#>     chrY|57213880|57213964|- OTTHUMT00000058841.1 PGO:0000005        <NA>
#>     chrY|57214350|57214397|- OTTHUMT00000058841.1 PGO:0000005        <NA>
#>   -------
#>   seqinfo: 374 sequences from an unspecified genome; no seqlengths
pryr::object_size(rse_exon_SRP009615)
#> 528.18 MB

## Create a RSE object at the exon-exon junction level
rse_jxn_SRP009615 <- create_rse(
    proj_info,
    type = "jxn"
)
#> 2025-05-15 15:31:05.955472 downloading and reading the metadata.
#> 2025-05-15 15:31:06.389315 caching file sra.sra.SRP009615.MD.gz.
#> 2025-05-15 15:31:06.906729 caching file sra.recount_project.SRP009615.MD.gz.
#> 2025-05-15 15:31:07.491589 caching file sra.recount_qc.SRP009615.MD.gz.
#> 2025-05-15 15:31:08.061836 caching file sra.recount_seq_qc.SRP009615.MD.gz.
#> 2025-05-15 15:31:08.546558 caching file sra.recount_pred.SRP009615.MD.gz.
#> 2025-05-15 15:31:08.613044 downloading and reading the feature information.
#> 2025-05-15 15:31:09.040326 caching file sra.junctions.SRP009615.ALL.RR.gz.
#> 2025-05-15 15:31:10.361883 downloading and reading the counts: 12 samples across 281448 features.
#> 2025-05-15 15:31:10.837945 caching file sra.junctions.SRP009615.ALL.MM.gz.
#> 2025-05-15 15:31:11.444887 matching exon-exon junction counts with the metadata.
#> 2025-05-15 15:31:11.935694 caching file sra.junctions.SRP009615.ALL.ID.gz.
#> 2025-05-15 15:31:12.010906 constructing the RangedSummarizedExperiment (rse) object.

## Explore the resulting RSE exon-exon junctions object
rse_jxn_SRP009615
#> class: RangedSummarizedExperiment 
#> dim: 281448 12 
#> metadata(9): time_created recount3_version ... jxn_format recount3_url
#> assays(1): counts
#> rownames(281448): chr1:11845-12009:+ chr1:12698-13220:+ ...
#>   chrY:56848810-56851543:- chrY:56850515-56850921:+
#> rowData names(6): length annotated ... left_annotated right_annotated
#> colnames(12): SRR387777 SRR387778 ... SRR389077 SRR389078
#> colData names(175): rail_id external_id ...
#>   recount_pred.curated.cell_line BigWigURL

dim(rse_jxn_SRP009615)
#> [1] 281448     12
rowRanges(rse_jxn_SRP009615)
#> GRanges object with 281448 ranges and 6 metadata columns:
#>                            seqnames            ranges strand |    length
#>                               <Rle>         <IRanges>  <Rle> | <integer>
#>         chr1:11845-12009:+     chr1       11845-12009      + |       165
#>         chr1:12698-13220:+     chr1       12698-13220      + |       523
#>        chr1:14696-185174:-     chr1      14696-185174      - |    170479
#>         chr1:14830-14969:-     chr1       14830-14969      - |       140
#>         chr1:14830-15020:-     chr1       14830-15020      - |       191
#>                        ...      ...               ...    ... .       ...
#>   chrY:56846131-56846553:+     chrY 56846131-56846553      + |       423
#>   chrY:56846268-56846553:+     chrY 56846268-56846553      + |       286
#>   chrY:56846486-56846553:+     chrY 56846486-56846553      + |        68
#>   chrY:56848810-56851543:-     chrY 56848810-56851543      - |      2734
#>   chrY:56850515-56850921:+     chrY 56850515-56850921      + |       407
#>                            annotated  left_motif right_motif
#>                            <integer> <character> <character>
#>         chr1:11845-12009:+         0          GT          AG
#>         chr1:12698-13220:+         1          GT          AG
#>        chr1:14696-185174:-         0          CT          AC
#>         chr1:14830-14969:-         1          CT          AC
#>         chr1:14830-15020:-         0          CT          AC
#>                        ...       ...         ...         ...
#>   chrY:56846131-56846553:+         0          GT          AG
#>   chrY:56846268-56846553:+         0          GT          AG
#>   chrY:56846486-56846553:+         0          GT          AG
#>   chrY:56848810-56851543:-         0          CT          AC
#>   chrY:56850515-56850921:+         0          GT          AG
#>                                    left_annotated        right_annotated
#>                                       <character>            <character>
#>         chr1:11845-12009:+                      0              aC19,sG19
#>         chr1:12698-13220:+ aC19,gC19,gC24,gC25,.. aC19,cH38,gC19,gC24,..
#>        chr1:14696-185174:-                      0                      0
#>         chr1:14830-14969:- aC19,cH38,gC19,kG19,.. aC19,cH38,gC19,kG19,..
#>         chr1:14830-15020:- aC19,cH38,gC19,kG19,..                      0
#>                        ...                    ...                    ...
#>   chrY:56846131-56846553:+                      0                      0
#>   chrY:56846268-56846553:+                      0                      0
#>   chrY:56846486-56846553:+                      0                      0
#>   chrY:56848810-56851543:-                      0                      0
#>   chrY:56850515-56850921:+                      0                      0
#>   -------
#>   seqinfo: 97 sequences from an unspecified genome; no seqlengths
pryr::object_size(rse_jxn_SRP009615)
#> 60.30 MB

## Obtain a list of RSE objects for all exon annotations
if (FALSE) { # \dontrun{
rses_exon <- lapply(annotation_options(), function(x) {
    create_rse(proj_info, type = "exon", annotation = x, verbose = FALSE)
})
names(rses_exon) <- annotation_options()
} # }