R Code Examples - RNA-Seq

For each example, I start with a data frame of count data that looks like the following.

	NPL32	NPL34	NPL44	NPL46	NPL56	NPL61	PL20	PL31	PL43	PL55	PL59	PL62
comp0_c0_seq1	1073920	1228123	5254066	1371825	967934	677390	232806	1123876	2613486	630659	1231571	229342
comp1_c0_seq1	1918518	1726845	4206394	1314385	615831	939729	343003	1570057	1481815	1227947	1273835	420144
comp2_c0_seq1	171037	243930	712047	646413	469036	166717	129706	629589	1289674	564959	332828	116855
comp5_c0_seq1	147703	41802	781225	261413	299237	101581	73395	175481	433382	79492	304055	97796
comp5_c1_seq1	121391	43070	656968	231221	305144	81390	50360	152298	430000	77287	252271	62325
comp6_c0_seq1	342865	294385	638089	228506	102413	290270	121084	301103	341995	234032	224490	213252

DESeq

# Getting the data and setting up the conditional factor vector
deseq.sums.countData_l <- sums.count.data[,c(1:6, 13:18)]
deseq.sums.conds_l <- factor(c(rep("NPL", 6), rep("PL", 6)))
# Creating the DESeq data object
deseq.sums.cds_l <- newCountDataSet(countData = deseq.sums.countData_l, conditions = deseq.sums.conds_l)
# Normalizing
deseq.sums.cds_l <- estimateSizeFactors(deseq.sums.cds_l)
# Doing the variance calculation
deseq.sums.cds_l <- estimateDispersions(deseq.sums.cds_l)
# Running the test and getting the results
deseq.sums.res_l <- nbinomTest(deseq.sums.cds_l, "NPL", "PL")

DESeq2

# Extracting the columns I want
deseq2.sums.countData_l <- sums.count.data[,c(1:6, 13:18)]
# Setting up the dataframe that describes the data
deseq2.sums.colData_l <- data.frame(condition=factor(c(rep("NPL", 6), rep("PL", 6))),
 type=factor(rep("single-read", 12)))
rownames(deseq2.sums.colData_l) <- colnames(deseq2.sums.countData_l)
# Creating the DESeq2 data object
deseq2.sums.dds_l <- DESeqDataSetFromMatrix(countData = deseq2.sums.countData_l,
 colData = deseq2.sums.colData_l,
 design = ~ condition)
# Running the analysis, getting the results and sorting them
deseq2.sums.dds_l <- DESeq(deseq2.sums.dds_l)
deseq2.sums.res_l <- results(deseq2.sums.dds_l)
deseq2.sums.res_l <- deseq2.sums.res_l[order(rownames(deseq2.sums.res_l)), ]

edgeR

# Creating the edgeR data object
edger.dgel_l <- DGEList(counts=deseq2.sums.countData_l, group=factor(c(rep(1,6), rep(2,6))))
# Normalizing the data
edger.dgel_l <- calcNormFactors(edger.dgel_l)
# Some other stuff I'm not sure about
edger.dgel_l <- estimateCommonDisp(edger.dgel_l)
edger.dgel_l <- estimateTagwiseDisp(edger.dgel_l)
# Running the tests
edger.et_l <- exactTest(edger.dgel_l)
# Getting the results and sorting them
edger.res_l <- topTags(edger.et_l, n=nrow, sort.by="none")
edger.res_l$table <- edger.res_l$table[order(rownames(edger.res_l$table)), ]

baySeq

# Setting up a 'mini' cluster using the snow library, using 4 cores on the current computer
cl <- makeCluster(c(rep("localhost", 4)), type="SOCK")
# Setting up the data object and description
bayseq.cd_l <- new("countData", data=as.matrix(sums.count.data[,c(1:6, 13:18)]),
 replicates=c(rep("NPL", 6), rep("PL", 6)),
 groups=list(NDE=rep(1,12), DE=c(rep(1,6), rep(2,6))))
# Normalizing
libsizes(bayseq.cd_l) <- getLibsizes(bayseq.cd_l)
# Adding in annotation
bayseq.cd_l@annotation <- data.frame(name=rownames(sums.count.data))
# Running the Tests, getting results and sorting them
bayseq.cd_l <- getPriors.NB(bayseq.cd_l, samplesize=10000, estimation="QL", cl=cl)
bayseq.cd_l <- getLikelihoods.NB(bayseq.cd_l, pET='BIC', cl=cl)
bayseq.res_l <- topCounts(bayseq.cd_l, group="DE", number=nrow(sums.count.data))
bayseq.res_l <- bayseq.res_l[order(rownames(bayseq.res_l)), ]

Plots

tmp <- deseq2.sums.res_l
# The main plot
plot(tmp$baseMean, tmp$log2FoldChange, pch=20, cex=0.45, ylim=c(-3, 3), log="x", col="darkgray",
     main="MA-Plot of NPL vs PL with DESeq2 Significant Results (pval <= 0.05)",
     xlab="mean of normalized counts",
     ylab="Log2 Fold Change")
# Getting the significant points and plotting them again so they're a different color
tmp.sig <- deseq2.sums.res_l[!is.na(deseq2.sums.res_l$padj) & deseq2.sums.res_l$padj <= 0.05, ]
points(tmp.sig$baseMean, tmp.sig$log2FoldChange, pch=20, cex=0.45, col="red")
# 2 FC lines
abline(h=c(-1,1), col="blue")