This function uses AnnotationHub to obtain the objects provided by this package. These are: the TxDb object made with gencode_txdb(), the annotated genes object made with gencode_annotated_genes() or the GenomicState object made with gencode_genomic_state().

GenomicStateHub(
  version = "31",
  genome = c("hg38", "hg19"),
  filetype = c("TxDb", "AnnotatedGenes", "GenomicState"),
  ah = AnnotationHub::AnnotationHub()
)

Arguments

version

A character(1) with the Gencode version number.

genome

A character(1) with the human genome version number. Valid options are 'hg38' or 'hg19'.

filetype

A character() with either TxDb, AnnotatedGenes or GenomicState.

ah

An AnnotationHub object AnnotationHub-class.

Value

The AnnotationHub-class query for the file you requested.

Author

Leonardo Collado-Torres

Examples


## Query AnnotationHub for the GenomicState object for Gencode v31 on
## hg19 coordinates
hub_query_gs_gencode_v31_hg19 <- GenomicStateHub(
    version = "31",
    genome = "hg19",
    filetype = "GenomicState"
)
hub_query_gs_gencode_v31_hg19
#> AnnotationHub with 1 record
#> # snapshotDate(): 2024-10-28
#> # names(): AH75184
#> # $dataprovider: GENCODE
#> # $species: Homo sapiens
#> # $rdataclass: list
#> # $rdatadateadded: 2019-10-22
#> # $title: GenomicState for Gencode v31 on hg19 coordinates
#> # $description: Gencode v31 GenomicState from derfinder::makeGenomicState() ...
#> # $taxonomyid: 9606
#> # $genome: GRCh37
#> # $sourcetype: GTF
#> # $sourceurl: ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/releas...
#> # $sourcesize: NA
#> # $tags: c("Gencode", "GenomicState", "hg19", "v31") 
#> # retrieve record with 'object[["AH75184"]]' 


## Check the metadata
mcols(hub_query_gs_gencode_v31_hg19)
#> DataFrame with 1 row and 15 columns
#>                          title dataprovider      species taxonomyid      genome
#>                    <character>  <character>  <character>  <integer> <character>
#> AH75184 GenomicState for Gen..      GENCODE Homo sapiens       9606      GRCh37
#>                    description coordinate_1_based             maintainer
#>                    <character>          <integer>            <character>
#> AH75184 Gencode v31 GenomicS..                  1 Leonardo Collado-Tor..
#>         rdatadateadded preparerclass                          tags  rdataclass
#>            <character>   <character>                        <AsIs> <character>
#> AH75184     2019-10-22  GenomicState Gencode,GenomicState,hg19,...        list
#>                      rdatapath              sourceurl  sourcetype
#>                    <character>            <character> <character>
#> AH75184 GenomicState/gencode.. ftp://ftp.ebi.ac.uk/..         GTF

## Access the file through AnnotationHub
if (length(hub_query_gs_gencode_v31_hg19) == 1) {
    hub_gs_gencode_v31_hg19 <- hub_query_gs_gencode_v31_hg19[[1]]

    hub_gs_gencode_v31_hg19
}
#> loading from cache
#> $fullGenome
#> GRanges object with 659263 ranges and 5 metadata columns:
#>          seqnames            ranges strand |   theRegion         tx_id
#>             <Rle>         <IRanges>  <Rle> | <character> <IntegerList>
#>        1     chr1       11869-12227      + |        exon           1,2
#>        2     chr1       12228-12612      + |      intron           1,2
#>        3     chr1       12613-12721      + |        exon           1,2
#>        4     chr1       12722-12974      + |      intron           1,2
#>        5     chr1       12975-13052      + |        exon             2
#>      ...      ...               ...    ... .         ...           ...
#>   659259     chrY 59208555-59214013      * |  intergenic              
#>   659260     chrY 59276440-59311662      * |  intergenic              
#>   659261     chrY 59311997-59318040      * |  intergenic              
#>   659262     chrY 59318921-59330251      * |  intergenic              
#>   659263     chrY 59360549-59373566      * |  intergenic              
#>                                          tx_name          gene          symbol
#>                                  <CharacterList> <IntegerList> <CharacterList>
#>        1 ENST00000450305.2_1,ENST00000456328.2_1         26085         DDX11L1
#>        2 ENST00000450305.2_1,ENST00000456328.2_1         26085         DDX11L1
#>        3 ENST00000450305.2_1,ENST00000456328.2_1         26085         DDX11L1
#>        4 ENST00000450305.2_1,ENST00000456328.2_1         26085         DDX11L1
#>        5                     ENST00000450305.2_1         26085         DDX11L1
#>      ...                                     ...           ...             ...
#>   659259                                                                      
#>   659260                                                                      
#>   659261                                                                      
#>   659262                                                                      
#>   659263                                                                      
#>   -------
#>   seqinfo: 24 sequences from hg19 genome
#> 
#> $codingGenome
#> GRanges object with 878954 ranges and 5 metadata columns:
#>          seqnames            ranges strand |   theRegion         tx_id
#>             <Rle>         <IRanges>  <Rle> | <character> <IntegerList>
#>        1     chr1        9869-11868      + |    promoter           1,2
#>        2     chr1       11869-12227      + |        exon           1,2
#>        3     chr1       12228-12612      + |      intron           1,2
#>        4     chr1       12613-12721      + |        exon           1,2
#>        5     chr1       12722-12974      + |      intron           1,2
#>      ...      ...               ...    ... .         ...           ...
#>   878950     chrY 59208555-59212013      * |  intergenic              
#>   878951     chrY 59276440-59311662      * |  intergenic              
#>   878952     chrY 59313997-59318040      * |  intergenic              
#>   878953     chrY 59320921-59328251      * |  intergenic              
#>   878954     chrY 59362549-59373566      * |  intergenic              
#>                                          tx_name          gene          symbol
#>                                  <CharacterList> <IntegerList> <CharacterList>
#>        1 ENST00000450305.2_1,ENST00000456328.2_1         26085         DDX11L1
#>        2 ENST00000450305.2_1,ENST00000456328.2_1         26085         DDX11L1
#>        3 ENST00000450305.2_1,ENST00000456328.2_1         26085         DDX11L1
#>        4 ENST00000450305.2_1,ENST00000456328.2_1         26085         DDX11L1
#>        5 ENST00000450305.2_1,ENST00000456328.2_1         26085         DDX11L1
#>      ...                                     ...           ...             ...
#>   878950                                                                      
#>   878951                                                                      
#>   878952                                                                      
#>   878953                                                                      
#>   878954                                                                      
#>   -------
#>   seqinfo: 24 sequences from hg19 genome
#>