R Code Examples - RNA-Seq


For each example, I start with a data frame of count data that looks like the following.

NPL32
NPL34
NPL44
NPL46
NPL56
NPL61
PL20
PL31
PL43
PL55
PL59
PL62
comp0_c0_seq1
1073920
1228123
5254066
1371825
967934
677390
232806
1123876
2613486
630659
1231571
229342
comp1_c0_seq1
1918518
1726845
4206394
1314385
615831
939729
343003
1570057
1481815
1227947
1273835
420144
comp2_c0_seq1
171037
243930
712047
646413
469036
166717
129706
629589
1289674
564959
332828
116855
comp5_c0_seq1
147703
41802
781225
261413
299237
101581
73395
175481
433382
79492
304055
97796
comp5_c1_seq1
121391
43070
656968
231221
305144
81390
50360
152298
430000
77287
252271
62325
comp6_c0_seq1
342865
294385
638089
228506
102413
290270
121084
301103
341995
234032
224490
213252

DESeq

# Getting the data and setting up the conditional factor vector
deseq.sums.countData_l <- sums.count.data[,c(1:6, 13:18)]
deseq.sums.conds_l <- factor(c(rep("NPL", 6), rep("PL", 6)))
# Creating the DESeq data object
deseq.sums.cds_l <- newCountDataSet(countData = deseq.sums.countData_l, conditions = deseq.sums.conds_l)
# Normalizing
deseq.sums.cds_l <- estimateSizeFactors(deseq.sums.cds_l)
# Doing the variance calculation
deseq.sums.cds_l <- estimateDispersions(deseq.sums.cds_l)
# Running the test and getting the results
deseq.sums.res_l <- nbinomTest(deseq.sums.cds_l, "NPL", "PL")

DESeq2

# Extracting the columns I want
deseq2.sums.countData_l <- sums.count.data[,c(1:6, 13:18)]
# Setting up the dataframe that describes the data
deseq2.sums.colData_l <- data.frame(condition=factor(c(rep("NPL", 6), rep("PL", 6))),
 type=factor(rep("single-read", 12)))
rownames(deseq2.sums.colData_l) <- colnames(deseq2.sums.countData_l)
# Creating the DESeq2 data object
deseq2.sums.dds_l <- DESeqDataSetFromMatrix(countData = deseq2.sums.countData_l,
 colData = deseq2.sums.colData_l,
 design = ~ condition)
# Running the analysis, getting the results and sorting them
deseq2.sums.dds_l <- DESeq(deseq2.sums.dds_l)
deseq2.sums.res_l <- results(deseq2.sums.dds_l)
deseq2.sums.res_l <- deseq2.sums.res_l[order(rownames(deseq2.sums.res_l)), ]

edgeR

# Creating the edgeR data object
edger.dgel_l <- DGEList(counts=deseq2.sums.countData_l, group=factor(c(rep(1,6), rep(2,6))))
# Normalizing the data
edger.dgel_l <- calcNormFactors(edger.dgel_l)
# Some other stuff I'm not sure about
edger.dgel_l <- estimateCommonDisp(edger.dgel_l)
edger.dgel_l <- estimateTagwiseDisp(edger.dgel_l)
# Running the tests
edger.et_l <- exactTest(edger.dgel_l)
# Getting the results and sorting them
edger.res_l <- topTags(edger.et_l, n=nrow, sort.by="none")
edger.res_l$table <- edger.res_l$table[order(rownames(edger.res_l$table)), ]

baySeq

# Setting up a 'mini' cluster using the snow library, using 4 cores on the current computer
cl <- makeCluster(c(rep("localhost", 4)), type="SOCK")
# Setting up the data object and description
bayseq.cd_l <- new("countData", data=as.matrix(sums.count.data[,c(1:6, 13:18)]),
 replicates=c(rep("NPL", 6), rep("PL", 6)),
 groups=list(NDE=rep(1,12), DE=c(rep(1,6), rep(2,6))))
# Normalizing
libsizes(bayseq.cd_l) <- getLibsizes(bayseq.cd_l)
# Adding in annotation
bayseq.cd_l@annotation <- data.frame(name=rownames(sums.count.data))
# Running the Tests, getting results and sorting them
bayseq.cd_l <- getPriors.NB(bayseq.cd_l, samplesize=10000, estimation="QL", cl=cl)
bayseq.cd_l <- getLikelihoods.NB(bayseq.cd_l, pET='BIC', cl=cl)
bayseq.res_l <- topCounts(bayseq.cd_l, group="DE", number=nrow(sums.count.data))
bayseq.res_l <- bayseq.res_l[order(rownames(bayseq.res_l)), ]

Plots

tmp <- deseq2.sums.res_l
# The main plot
plot(tmp$baseMean, tmp$log2FoldChange, pch=20, cex=0.45, ylim=c(-3, 3), log="x", col="darkgray",
     main="MA-Plot of NPL vs PL with DESeq2 Significant Results (pval <= 0.05)",
     xlab="mean of normalized counts",
     ylab="Log2 Fold Change")
# Getting the significant points and plotting them again so they're a different color
tmp.sig <- deseq2.sums.res_l[!is.na(deseq2.sums.res_l$padj) & deseq2.sums.res_l$padj <= 0.05, ]
points(tmp.sig$baseMean, tmp.sig$log2FoldChange, pch=20, cex=0.45, col="red")
# 2 FC lines
abline(h=c(-1,1), col="blue")