For some data Y organized in a matrix, calculate a F-statitic per row comparing two nested models.
getF(fit, fit0, theData)
A data.frame with the F-statistics (fstat
), the degrees of
freedom for the nallternative model (df1
), the null model
(df0
), and the p-value given the F-distribution (f_pval
).
This function can also work with outputs from lm.
set.seed(20161005)
## From limma::limFit example page:
sd <- 0.3 * sqrt(4 / rchisq(100, df = 4))
y <- matrix(rnorm(100 * 6, sd = sd), 100, 6)
rownames(y) <- paste("Gene", seq_len(100))
y[seq_len(2), seq_len(3) + 3] <- y[seq_len(2), seq_len(3) + 3] + 4
## Define the alternative and null models
pheno <- data.frame(group = rep(c(0, 1), each = 3), RIN = runif(6) + 8)
mod <- model.matrix(~ pheno$group + pheno$RIN)
mod0 <- model.matrix(~ pheno$RIN)
## Fit the models
library("limma")
fit <- lmFit(y, mod)
fit0 <- lmFit(y, mod0)
## Calculate the F statistics for these nested models
finfo <- getF(fit, fit0, y)
head(finfo)
#> fstat df1 df0 f_pval
#> Gene 1 330.0263872 1 3 0.0003638568
#> Gene 2 49.1825785 1 3 0.0059544883
#> Gene 3 0.5900420 1 3 0.4983295890
#> Gene 4 2.4241112 1 3 0.2173486908
#> Gene 5 0.2099931 1 3 0.6779292235
#> Gene 6 0.6686464 1 3 0.4734255741
## You can then use p.adjust() for multiple testing corrections
qvals <- p.adjust(finfo$f_pval, "fdr")
summary(qvals)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.03639 0.94520 0.94520 0.92277 0.95368 0.99487