This function merges the sample metadata from a RangedSummarizedExperiment object for some of the variables that we (at Jaffelab) typically create with our RNA-seq processing pipeline for samples that are sequenced in more than one lane.

merge_rse_metrics(rse)

Arguments

rse

A RangedSummarizedExperiment-class object with 'concordMapRate', 'overallMapRate', 'mitoRate', 'rRNA_rate', 'totalAssignedGene', 'numMapped', 'numReads', 'numUnmapped', 'mitoMapped', 'totalMapped', 'ERCCsumLogErr' as either NumericList() or IntegerList() objects in the colData() columns.

Value

A RangedSummarizedExperiment-class

with merged columns for 'concordMapRate', 'overallMapRate', 'mitoRate', 'rRNA_rate', 'totalAssignedGene', 'numMapped', 'numReads', 'numUnmapped', 'mitoMapped', 'totalMapped', 'ERCCsumLogErr'

Author

Leonardo Collado-Torres, Andrew E Jaffe

Examples


## Taken from ?SummarizedExperiment
library("SummarizedExperiment")
#> Loading required package: MatrixGenerics
#> Loading required package: matrixStats
#> 
#> Attaching package: ‘MatrixGenerics’
#> The following objects are masked from ‘package:matrixStats’:
#> 
#>     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
#>     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
#>     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
#>     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
#>     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
#>     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
#>     colWeightedMeans, colWeightedMedians, colWeightedSds,
#>     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
#>     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
#>     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
#>     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
#>     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
#>     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
#>     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
#>     rowWeightedSds, rowWeightedVars
#> Loading required package: GenomicRanges
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#> 
#> Attaching package: ‘BiocGenerics’
#> The following object is masked from ‘package:limma’:
#> 
#>     plotMA
#> The following objects are masked from ‘package:stats’:
#> 
#>     IQR, mad, sd, var, xtabs
#> The following objects are masked from ‘package:base’:
#> 
#>     Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
#>     as.data.frame, basename, cbind, colnames, dirname, do.call,
#>     duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
#>     lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
#>     pmin.int, rank, rbind, rownames, sapply, setdiff, sort, table,
#>     tapply, union, unique, unsplit, which.max, which.min
#> Loading required package: S4Vectors
#> 
#> Attaching package: ‘S4Vectors’
#> The following object is masked from ‘package:utils’:
#> 
#>     findMatches
#> The following objects are masked from ‘package:base’:
#> 
#>     I, expand.grid, unname
#> Loading required package: IRanges
#> Loading required package: GenomeInfoDb
#> Loading required package: Biobase
#> Welcome to Bioconductor
#> 
#>     Vignettes contain introductory material; view with
#>     'browseVignettes()'. To cite Bioconductor, see
#>     'citation("Biobase")', and for packages 'citation("pkgname")'.
#> 
#> Attaching package: ‘Biobase’
#> The following object is masked from ‘package:MatrixGenerics’:
#> 
#>     rowMedians
#> The following objects are masked from ‘package:matrixStats’:
#> 
#>     anyMissing, rowMedians
nrows <- 200
ncols <- 6
set.seed(20181116)
counts <- matrix(runif(nrows * ncols, 1, 1e4), nrows)
rowRanges <- GRanges(rep(c("chr1", "chr2"), c(50, 150)),
    IRanges(floor(runif(200, 1e5, 1e6)), width = 100),
    strand = sample(c("+", "-"), 200, TRUE),
    feature_id = sprintf("ID%03d", seq_len(200))
)

## Function for generating some data
rand_gen <- function(mu = 1e6, sd = 1e3, digits = 0) {
    IRanges::NumericList(lapply(seq_len(6), function(x) {
        round(rnorm(2, mu, sd), digits)
    }))
}
colData <- DataFrame(
    Treatment = rep(c("ChIP", "Input"), 3),
    row.names = LETTERS[seq_len(6)],
    concordMapRate = rand_gen(0.5, 0.05, 10),
    overallMapRate = rand_gen(0.7, 0.05, 10),
    mitoRate = rand_gen(0.2, 0.05, 10),
    rRNA_rate = rand_gen(0.1, 0.01, 10),
    totalAssignedGene = rand_gen(0.5, 0.05, 10),
    numMapped = rand_gen(),
    numReads = rand_gen(1e6 + 6e5),
    numUnmapped = rand_gen(5e5),
    mitoMapped = rand_gen(1e5),
    totalMapped = rand_gen(1e6 + 1e5),
    ERCCsumLogErr = rand_gen(-28.5, 4, 6)
)
rse <- SummarizedExperiment(
    assays = SimpleList(counts = counts),
    rowRanges = rowRanges, colData = colData
)
colData(rse)
#> DataFrame with 6 rows and 12 columns
#>     Treatment    concordMapRate    overallMapRate          mitoRate
#>   <character>     <NumericList>     <NumericList>     <NumericList>
#> A        ChIP 0.541645,0.581919 0.638841,0.744259 0.196154,0.176830
#> B       Input 0.636704,0.417308 0.720357,0.808207 0.206167,0.307936
#> C        ChIP 0.480932,0.563030 0.766605,0.731026 0.263311,0.191516
#> D       Input 0.514338,0.404294 0.680493,0.706884 0.199656,0.201472
#> E        ChIP 0.575868,0.580396 0.675552,0.721660 0.283546,0.200031
#> F       Input 0.487973,0.549464 0.758194,0.680816 0.254013,0.122518
#>             rRNA_rate totalAssignedGene       numMapped        numReads
#>         <NumericList>     <NumericList>   <NumericList>   <NumericList>
#> A 0.0982325,0.1004535 0.534906,0.495279  999489,1001045 1600514,1599856
#> B   0.108117,0.105019 0.512784,0.585948   999904,999645 1600861,1600902
#> C 0.0935756,0.1128849 0.500233,0.543947 1000994, 999994 1599045,1599113
#> D 0.0983786,0.0961797 0.454453,0.394858 1000080,1001643 1600388,1601137
#> E 0.0786774,0.0902039 0.511399,0.444904   999405,998558 1598587,1600106
#> F   0.116688,0.101786 0.512436,0.581784 1000598, 999127 1598227,1598982
#>     numUnmapped    mitoMapped     totalMapped     ERCCsumLogErr
#>   <NumericList> <NumericList>   <NumericList>     <NumericList>
#> A 498799,500663 100075, 99746 1100621,1098107 -30.5656,-40.7510
#> B 499160,501936 100454,100220 1099005,1100717 -26.1549,-25.9601
#> C 499887,499965 100077,100616 1099423,1101197 -29.4516,-27.9824
#> D 499315,499551   98586,97819 1098706,1098371 -22.6042,-28.8658
#> E 499205,500651  98690,100203 1099373,1101728 -22.6664,-31.5804
#> F 500096,501025   99278,98614 1101246,1098881 -28.1533,-28.5806
rse_merged <- merge_rse_metrics(rse)
#> 2024-03-27 13:28:53.766334 attempting to approximate ERCCsumLogErr.
colData(rse_merged)
#> DataFrame with 6 rows and 12 columns
#>     Treatment concordMapRate overallMapRate  mitoRate rRNA_rate
#>   <character>      <numeric>      <numeric> <numeric> <numeric>
#> A        ChIP       0.561778       0.691539  0.186484 0.0993439
#> B       Input       0.527004       0.764283  0.257045 0.1065683
#> C        ChIP       0.521982       0.748815  0.227432 0.1032254
#> D       Input       0.459303       0.693692  0.200565 0.0972783
#> E        ChIP       0.578133       0.698617  0.241806 0.0844382
#> F       Input       0.518726       0.719496  0.188314 0.1092428
#>   totalAssignedGene numMapped  numReads numUnmapped mitoMapped totalMapped
#>           <numeric> <numeric> <numeric>   <numeric>  <numeric>   <numeric>
#> A          0.515077   2000534   3200370      999462     199821     2198728
#> B          0.549361   1999549   3201763     1001096     200674     2199722
#> C          0.522079   2000988   3198158      999852     200693     2200620
#> D          0.424632   2001723   3201525      998866     196405     2197077
#> E          0.478166   1997963   3198693      999856     198893     2201101
#> F          0.547085   1999725   3197209     1001121     197892     2200127
#>   ERCCsumLogErr
#>       <numeric>
#> A      -35.5606
#> B      -26.0575
#> C      -28.7150
#> D      -25.6981
#> E      -27.0485
#> F      -28.3668

## Use the BSP2 real data
local_bsp2 <- tempfile("BSP2_rse_gene.Rdata")
download.file(
    "https://s3.us-east-2.amazonaws.com/libd-brainseq2/rse_gene_unfiltered.Rdata",
    destfile = local_bsp2,
    mode = "wb"
)
load(local_bsp2, verbose = TRUE)
#> Loading objects:
#>   rse_gene
# lobstr::obj_size(rse_gene)
# 872.12 MB
bsp2_rse_merged <- merge_rse_metrics(rse_gene)
#> 2024-03-27 13:29:04.807562 attempting to approximate ERCCsumLogErr.
colData(bsp2_rse_merged)
#> DataFrame with 900 rows and 54 columns
#>                                            SAMPLE_ID   FQCbasicStats
#>                                      <CharacterList> <CharacterList>
#> R10424                        R10424_C4KC9ACXX_TAA..            PASS
#> R12195                        R12195_C4E9TACXX_CGG..            PASS
#> R12198                        R12198_C4KC9ACXX_TCC..            PASS
#> R12199                        R12199_C4KC9ACXX_TCT..            PASS
#> R12200                        R12200_C4KC9ACXX_AGC..            PASS
#> ...                                              ...             ...
#> R5760                         R5760_C3ULGACXX_ATTA..            PASS
#> R5764                         R5764_C3ULGACXX_TCCG..            PASS
#> R5765                         R5765_C3KUEACXX_TCTC..            PASS
#> R5766  R5766_C41CPACXX_CTGA..,R5766_C4KY0ACXX_CTGA..            PASS
#> R5768                         R5768_C3DE5ACXX_GAAT..            PASS
#>            perBaseQual     perTileQual      perSeqQual  perBaseContent
#>        <CharacterList> <CharacterList> <CharacterList> <CharacterList>
#> R10424            PASS            PASS            PASS       FAIL/WARN
#> R12195            PASS            WARN            PASS       FAIL/WARN
#> R12198            PASS            PASS            PASS       FAIL/WARN
#> R12199            PASS            PASS            PASS       FAIL/WARN
#> R12200            PASS            PASS            PASS       FAIL/WARN
#> ...                ...             ...             ...             ...
#> R5760             PASS            PASS            PASS       FAIL/WARN
#> R5764             PASS            PASS            PASS       FAIL/WARN
#> R5765             PASS       PASS/WARN            PASS       FAIL/WARN
#> R5766        PASS,FAIL  WARN/PASS,WARN            PASS            FAIL
#> R5768             PASS            PASS            PASS       FAIL/WARN
#>              GCcontent        Ncontent   SeqLengthDist  SeqDuplication
#>        <CharacterList> <CharacterList> <CharacterList> <CharacterList>
#> R10424       WARN/FAIL            PASS            PASS            FAIL
#> R12195       WARN/FAIL            PASS            PASS            WARN
#> R12198            WARN            PASS            PASS       FAIL/WARN
#> R12199            WARN            PASS            PASS            WARN
#> R12200       WARN/FAIL            PASS            PASS       WARN/PASS
#> ...                ...             ...             ...             ...
#> R5760             WARN            PASS            PASS       FAIL/WARN
#> R5764             WARN            PASS            PASS       FAIL/WARN
#> R5765             WARN            PASS            PASS       FAIL/WARN
#> R5766             WARN            PASS            PASS            WARN
#> R5768             WARN            PASS            PASS       FAIL/WARN
#>            OverrepSeqs  AdapterContent     KmerContent  percentGC_R1
#>        <CharacterList> <CharacterList> <CharacterList> <IntegerList>
#> R10424            WARN            PASS            FAIL            43
#> R12195            WARN            PASS            FAIL            44
#> R12198            WARN            PASS            FAIL            45
#> R12199            WARN            PASS            FAIL            44
#> R12200            WARN            PASS            FAIL            45
#> ...                ...             ...             ...           ...
#> R5760             WARN            FAIL            FAIL            45
#> R5764             WARN            WARN            FAIL            45
#> R5765             WARN            PASS            FAIL            44
#> R5766             WARN            WARN            FAIL         47,46
#> R5768             WARN            PASS            FAIL            45
#>          phred100_R1      phredGT30_R1      phredGT35_R1    Adapter88_R1
#>        <NumericList>     <NumericList>     <NumericList>   <NumericList>
#> R10424            33          0.909088           0.70897         2.74234
#> R12195            34          0.906698          0.706421         1.52881
#> R12198            33          0.904668          0.695954         1.79942
#> R12199            33          0.907879          0.704134         1.48579
#> R12200            33          0.908431          0.705177          1.8948
#> ...              ...               ...               ...             ...
#> R5760             34          0.901271          0.705441         11.8898
#> R5764             34          0.904807          0.705915         6.36412
#> R5765             34          0.932119          0.769267         2.87781
#> R5766          32,29 0.889595,0.769699 0.673802,0.438134 6.99110,6.57718
#> R5768             34           0.93067          0.785657         2.23658
#>         percentGC_R2   phred100_R2      phredGT30_R2      phredGT35_R2
#>        <IntegerList> <NumericList>     <NumericList>     <NumericList>
#> R10424            43            32          0.861428          0.663019
#> R12195            44            34          0.853311           0.65908
#> R12198            45            32          0.832541          0.619686
#> R12199            44            32          0.839551          0.635018
#> R12200            45            32          0.849138          0.647683
#> ...              ...           ...               ...               ...
#> R5760             45            33          0.855376          0.692943
#> R5764             45            33          0.850278          0.682974
#> R5765             44            32            0.8122          0.617688
#> R5766          47,46         31,30 0.855985,0.739332 0.687230,0.479329
#> R5768             45            34          0.876481          0.731134
#>           Adapter88_R2 ERCCsumLogErr
#>          <NumericList>     <numeric>
#> R10424          2.7528     -110.3658
#> R12195         1.54912      -18.3144
#> R12198         1.80651      -23.1269
#> R12199         1.49456      -30.3037
#> R12200         1.90594      -24.5632
#> ...                ...           ...
#> R5760          12.2012      -623.141
#> R5764          6.53021      -623.141
#> R5765          2.72979      -623.141
#> R5766  7.13411,6.80373      -623.141
#> R5768          2.27037      -623.141
#>                                              bamFile       trimmed  numReads
#>                                      <CharacterList> <LogicalList> <integer>
#> R10424                        /dcl01/lieber/ajaffe..         FALSE 137207346
#> R12195                        /dcl01/lieber/ajaffe..         FALSE 186256540
#> R12198                        /dcl01/lieber/ajaffe..         FALSE 211587890
#> R12199                        /dcl01/lieber/ajaffe..         FALSE 127053710
#> R12200                        /dcl01/lieber/ajaffe..         FALSE  80463970
#> ...                                              ...           ...       ...
#> R5760                         /dcl01/lieber/ajaffe..          TRUE 141075744
#> R5764                         /dcl01/lieber/ajaffe..         FALSE 119501410
#> R5765                         /dcl01/lieber/ajaffe..         FALSE 172994940
#> R5766  /dcl01/lieber/ajaffe..,/dcl01/lieber/ajaffe..         FALSE 111397580
#> R5768                         /dcl01/lieber/ajaffe..         FALSE 118383286
#>        numMapped numUnmapped overallMapRate concordMapRate totalMapped
#>        <integer>   <integer>      <numeric>      <numeric>   <integer>
#> R10424 125018261    12189085         0.9112         0.8586   127414655
#> R12195 173098203    13158337         0.9294         0.8862   179828658
#> R12198 190675169    20912721         0.9012         0.8541   197766175
#> R12199 118502044     8551666         0.9327         0.8868   122760702
#> R12200  73183420     7280550         0.9095         0.8596    75668574
#> ...          ...         ...            ...            ...         ...
#> R5760  129117038    11958706         0.9152       0.914900   107353997
#> R5764  102010413    17490997         0.8536       0.767300    80847720
#> R5765  157607321    15387619         0.9111       0.847400   120937201
#> R5766   94485287    16912293         0.8482       0.760247    79219995
#> R5768  110231568     8151718         0.9311       0.855700    85972502
#>        mitoMapped  mitoRate totalAssignedGene   rRNA_rate        RNum
#>         <integer> <numeric>         <numeric>   <numeric> <character>
#> R10424    2250800 0.0173585          0.360012 5.26368e-05      R10424
#> R12195    2929274 0.0160282          0.400558 7.92850e-05      R12195
#> R12198    4838084 0.0238795          0.438474 8.75740e-05      R12198
#> R12199    1915632 0.0153648          0.438864 6.99381e-05      R12199
#> R12200     826483 0.0108044          0.362791 8.24223e-05      R12200
#> ...           ...       ...               ...         ...         ...
#> R5760    29972076  0.218255          0.419577 1.86703e-05       R5760
#> R5764    26910188  0.249728          0.454518 1.91898e-05       R5764
#> R5765    44435124  0.268697          0.479815 1.57517e-05       R5765
#> R5766    21543948  0.213824          0.497928 2.62909e-05       R5766
#> R5768    29687209  0.256677          0.555499 8.52578e-06       R5768
#>              BrNum      Region           RIN       Age         Sex        Race
#>        <character> <character> <NumericList> <numeric> <character> <character>
#> R10424      Br5168       DLPFC           6.7     64.08           M        CAUC
#> R12195      Br5073       DLPFC           8.4     62.61           M          AA
#> R12198      Br5217       DLPFC           8.5     29.98           M        CAUC
#> R12199      Br5234       DLPFC           8.8     65.26           M        CAUC
#> R12200      Br5372       DLPFC             7     32.35           M          AA
#> ...            ...         ...           ...       ...         ...         ...
#> R5760       Br1627       HIPPO           7.1    0.5147           F        CAUC
#> R5764       Br1832       HIPPO           5.7   54.8600           M          AA
#> R5765       Br1840       HIPPO           5.6   55.3400           M          AA
#> R5766       Br1884       HIPPO             8   38.4800           M          AA
#> R5768       Br1898       HIPPO           7.8   34.7600           M          AA
#>                 Dx      snpPC1       snpPC2      snpPC3       snpPC4
#>        <character>   <numeric>    <numeric>   <numeric>    <numeric>
#> R10424     Control   0.0541862   0.00141503 -0.02229830  6.71690e-06
#> R12195      Schizo  -0.0255144  -0.00401754  0.00553881 -5.78574e-04
#> R12198      Schizo   0.0545858  -0.00788385  0.00479833  4.66608e-04
#> R12199      Schizo   0.0543872  -0.00683429  0.00556194 -8.37099e-05
#> R12200      Schizo  -0.0434981  -0.00112584  0.00438767 -3.02685e-03
#> ...            ...         ...          ...         ...          ...
#> R5760      Control  0.05343660 -5.27473e-03  0.00452529 -0.000197550
#> R5764      Control -0.07612950 -7.49554e-05  0.00128218 -0.000715601
#> R5765       Schizo -0.04487910  1.43625e-04  0.00581480 -0.002767710
#> R5766      Control  0.00203288 -5.36031e-03  0.00636164  0.003517570
#> R5768      Control -0.05808860 -1.22494e-03  0.00182823  0.000191893
#>              snpPC5      snpPC6       snpPC7      snpPC8       snpPC9
#>           <numeric>   <numeric>    <numeric>   <numeric>    <numeric>
#> R10424  8.66316e-05 -0.00505508 -0.001056890  0.00511064 -0.001438760
#> R12195 -1.47552e-03  0.00816687 -0.005289970  0.00590834 -0.006252970
#> R12198  1.82373e-04  0.00346950 -0.001874620 -0.00283671 -0.000653331
#> R12199 -6.26193e-04 -0.00131079  0.000433349 -0.00301263  0.001626550
#> R12200  1.78597e-03 -0.00347009  0.000658279 -0.00062054  0.002938670
#> ...             ...         ...          ...         ...          ...
#> R5760   0.000762996 -0.00125775 -0.001663130 -0.00240119  2.14050e-03
#> R5764   0.001890170  0.00334776  0.003100850 -0.00130183 -5.45976e-05
#> R5765  -0.001926500 -0.00123727 -0.000452834 -0.00148963  2.95421e-03
#> R5766   0.002043590 -0.00792677  0.005129850  0.00190378  4.51872e-03
#> R5768  -0.001656520  0.00538843 -0.001508440 -0.00038706 -1.06840e-03
#>             snpPC10
#>           <numeric>
#> R10424  0.000946483
#> R12195  0.008017290
#> R12198 -0.002142300
#> R12199 -0.003569450
#> R12200 -0.000186203
#> ...             ...
#> R5760  -0.005488660
#> R5764  -0.000805835
#> R5765   0.004231900
#> R5766  -0.002314040
#> R5768   0.000110817

## Compare the ERCCsumLogErr approximation against the mean of the
## error values. Note that you would actually have to load the actual
## ERCC quantification output files in order to compute the correct
## ERCCsumLogErr values.
bsp2_rse_merged$mean_ERCCsumLogErr <-
    sapply(rse_gene$ERCCsumLogErr, mean)
with(
    colData(bsp2_rse_merged)[lengths(rse_gene$ERCCsumLogErr) > 1, ],
    plot(
        mean_ERCCsumLogErr,
        ERCCsumLogErr,
        ylab = "Approximate ERCCsumLogErr"
    )
)
abline(a = 0, b = 1, col = "red")

with(
    colData(bsp2_rse_merged)[lengths(rse_gene$ERCCsumLogErr) > 1, ],
    summary(mean_ERCCsumLogErr - ERCCsumLogErr)
)
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#> -46.91332  -0.47002  -0.25959  -1.02531  -0.12544  -0.00001