---
title: "R Notebook"
output: html_notebook
---
```{r}
library(forestplot)
library(readr)
data <- read_delim("~/Documents/Roberts Lab/geoduck transcriptome/Geoduck-transcriptome_v3_bigtable_v08.tab.txt", "\t", escape_double = FALSE, trim_ws = TRUE)
GOslim_bin <- read_delim("~/Documents/Roberts Lab/geoduck transcriptome/GOslim-bin.txt",
"\t", escape_double = FALSE, trim_ws = TRUE)
```
```{r}
data_merged <- merge(data, GOslim_bin, by.x = "ContigID", by.y = "Column1", all = FALSE)
```
```{r}
#install.packages("mixtools")
#install.packages("mclust")
library("mixtools")
library("mclust")
data_cpg <- data_merged[,4]
data_cpg <- data_cpg[data_cpg >= 0.001 & data_cpg <= 1.5]
model <- normalmixEM(data_cpg, k = 2)
plot(model, which = 2, col2 = c("blue", "red"), xlab2 = " ", main2 = "Panopea generosa", font.main = 3)
lines(density(data_cpg), lty=2, lwd=2)
model.mclust <- Mclust(data_cpg)
summary(model.mclust)
model.mclust.1 <- Mclust(data_cpg, G = 1)
unimodal <- logLik(model.mclust.1)
bimodal <- logLik(model.mclust)
difference <- bimodal[1] - unimodal[1]
p.val <- 1- pchisq(difference, df = 3)
data_merged$GOSlim_bin <- as.factor(data_merged$GOSlim_bin)
GOSlim_types <- levels(data_merged$GOSlim_bin)
```
```{r}
for(i in 1:length(GOSlim_types)) {
hist(data_cpg[data_merged$GOSlim_bin == GOSlim_types[i]], main = GOSlim_types[i], xlab = "CpG O/E", xlim = range(0.01, 1.5), breaks = 15)
}
```