#Load in required packages for functions below
require(qpcR)
require(plyr)
require(ggplot2)
require(splitstackshape)

#Read in raw fluorescence data from 1st Actin replicate
rep1<-read.csv("CRAF4rawfluoro.csv", header = T)
#Remove blank first column entitled "X"
rep1$X<-NULL
#Rename columns so that qpcR package and appropriately handle the data
rep1<-rename(rep1, c("Cycle" = "Cycles", "A1" = "H_C_1", "A2" = "N_C_1",
                     "A3"= "S_C_1", "A4"="H_T_1", "A5"="N_T_1","A6"="S_T_1",
                     "A7"="NT_C_1","B1" = "H_C_2", "B2" = "N_C_2","B3"= "S_C_2",
                     "B4"="H_T_2", "B5"="N_T_2", "B6"="S_T_2","B7"="NT_C_2",
                     "C1" = "H_C_3", "C2" = "N_C_3","C3"= "S_C_3","C4"="H_T_3",
                     "C5"="N_T_3", "C6"="S_T_3", "C7"="NT_C_3","D1" = "H_C_4",
                     "D2" = "N_C_4","D3"= "S_C_4", "D4"="H_T_4", "D5"="N_T_4",
                     "D6"="S_T_4", "D7"="NT_C_4","E1" = "H_C_5", "E2" = "N_C_5",
                     "E3"= "S_C_5", "E4"="H_T_5", "E5"="N_T_5", "E6"="S_T_5",
                     "F1" = "H_C_6", "F2" = "N_C_6","F3"= "S_C_6", "F4"="H_T_6",
                     "F5"="N_T_6", "F6"="S_T_6","G1" = "H_C_7", "G2" = "N_C_7",
                     "G3"= "S_C_7", "G4"="H_T_7", "G5"="N_T_7", "G6"="S_T_7",
                     "H1" = "H_C_8", "H2" = "N_C_8","H3"= "S_C_8", "H4"="H_T_8",
                     "H5"="N_T_8", "H6"="S_T_8"))

#Run data through pcrbatch in qpcR package which analyzes fluorescence and produces efficiency and cycle threshold values
rep1ct<-pcrbatch(rep1, fluo=NULL)

#pcrbatch creates a file with each sample as an individual column in the dataframe. The problem with this is
#that I want to compare all the Ct (labelled sig.cpD2) and generate expression data for them but these values have to be
#in individual columns. To do this I must transpose the data and set the first row as the column names.
rep1res<-setNames(data.frame(t(rep1ct)),rep1ct[,1])
#Now I must remove the first row as it is a duplicate and will cause errors with future analysis
rep1res<-rep1res[-1,]

#since the sample names are now in the first column the column title is row.names. This makes analys hard based on the ability to call the first column.
#to eliminate this issue, I copied the first column into a new column called "Names"
rep1res$Names<-rownames(rep1res)

#Since each sample name contains information such as Population, Treatment, and Sample Number I want to separate out these factors
#into new columns so that I can run future analysis based on population, treatment, or both. Also note the "drop = F" this is so the original names column remains.
rep1res2<-cSplit_f(rep1res, splitCols=c("Names"), sep="_", drop = F)

#After splitting the names column into three new columns I need to rename them appropriately. 
rep1res2<-rename(rep1res2, c("Names_1"="Pop", "Names_2"="Treat", "Names_3"="Sample"))

#I also create a column with the target gene name. This isn't used in this analysis but will be helpful for future work.
rep1res2$Gene<-rep("CARM", length(rep1res2))

#In transposing the data frame, the column entries became factors which cannot be used for equations.
#to fix this, I set the entries for sig.eff (efficiency) and sig.cpD2 (Ct value) to numeric. Be aware, without the as.character function the factors will be transformed inappropriately.
rep1res2$sig.eff<-as.numeric(as.character(rep1res2$sig.eff))
rep1res2$sig.cpD2<-as.numeric(as.character(rep1res2$sig.cpD2))

#Now I plot the Ct values to see how they align without converting them to expression.
ggplot(rep1res2, aes(x=Names,y=sig.cpD2, fill=Pop))+geom_bar(stat="identity")

#Now I want to get expression information from my data set. qpcR has a way of doing this but its complicated and I'm not comfortable using it.
#Luckily there is an equation I can use to do it. The equation is expression = 1/(1+efficiency)^Ctvalue. I tried multiple ways to get this to work in R
#but it doesn't handle the complicated equation easily.
#To work around this, I created a function in R to run the equation and produce an outcome. x = efficiency argument, y=Ctvalue argument
expr<-function(x,y){
  newVar<-(1+x)^y
  1/newVar
}

#Now I run the data through the function and produce a useful expression value
rep1res2$expression<-expr(rep1res2$sig.eff, rep1res2$sig.cpD2)

#Graphing the expression values is a good way to examine the data quickly for errors that might have occurred. 
ggplot(rep1res2, aes(x=Names,y=expression, fill=Pop))+geom_bar(stat="identity")

#Before I'm able to compare the replicates I need to process the raw fluorescence from the second Actin run.
#To do this I perform all the same steps as the previous replicate.
rep2<-read.csv("CRAF5rawfluoro.csv", header = T)
rep2$X<-NULL
rep2<-rename(rep2, c("Cycle" = "Cycles", "A1" = "H_C_1", "A2" = "N_C_1",
                     "A3"= "S_C_1", "A4"="H_T_1", "A5"="N_T_1","A6"="S_T_1",
                     "A7"="NT_C_1","B1" = "H_C_2", "B2" = "N_C_2","B3"= "S_C_2",
                     "B4"="H_T_2", "B5"="N_T_2", "B6"="S_T_2","B7"="NT_C_2",
                     "C1" = "H_C_3", "C2" = "N_C_3","C3"= "S_C_3","C4"="H_T_3",
                     "C5"="N_T_3", "C6"="S_T_3", "C7"="NT_C_3","D1" = "H_C_4",
                     "D2" = "N_C_4","D3"= "S_C_4", "D4"="H_T_4", "D5"="N_T_4",
                     "D6"="S_T_4", "D7"="NT_C_4","E1" = "H_C_5", "E2" = "N_C_5",
                     "E3"= "S_C_5", "E4"="H_T_5", "E5"="N_T_5", "E6"="S_T_5",
                     "F1" = "H_C_6", "F2" = "N_C_6","F3"= "S_C_6", "F4"="H_T_6",
                     "F5"="N_T_6", "F6"="S_T_6","G1" = "H_C_7", "G2" = "N_C_7",
                     "G3"= "S_C_7", "G4"="H_T_7", "G5"="N_T_7", "G6"="S_T_7",
                     "H1" = "H_C_8", "H2" = "N_C_8","H3"= "S_C_8", "H4"="H_T_8",
                     "H5"="N_T_8", "H6"="S_T_8"))

rep2ct<-pcrbatch(rep2, fluo=NULL)

rep2res<-setNames(data.frame(t(rep2ct)),rep2ct[,1])
rep2res<-rep2res[-1,]

rep2res$Names<-rownames(rep2res)

rep2res2<-cSplit_f(rep2res, splitCols=c("Names"), sep="_", drop = F)

rep2res2<-rename(rep2res2, c("Names_1"="Pop", "Names_2"="Treat", "Names_3"="Sample"))

rep2res2$Gene<-rep("CRAF", length(rep2res2))

rep2res2$sig.eff<-as.numeric(as.character(rep2res2$sig.eff))
rep2res2$sig.cpD2<-as.numeric(as.character(rep2res2$sig.cpD2))


ggplot(rep2res2, aes(x=Names,y=sig.cpD2, fill=Pop))+geom_bar(stat="identity")

expr<-function(x,y){
  newVar<-(1+x)^y
  1/newVar
}

rep2res2$expression<-expr(rep2res2$sig.eff, rep2res2$sig.cpD2)

ggplot(rep2res2, aes(x=Names,y=expression, fill=Pop))+geom_bar(stat="identity")

#Now that I have Ct values, efficiencies and expression values for both replicates I can create a table of the differences between reps.
#To do this I create a data frame with a single formula that creates a column of values generated by subtracting the first run from the second.
repcomp<-as.data.frame(rep1res2$sig.cpD2-rep2res2$sig.cpD2)

#Now I need to add some Names for the samples to use with ggplot.Since the names column contains all the relevant information
#I copy only that column and run the split function on it again as well as the rename function. 
repcomp$Names<-rep1res2$Names
repcomp<-cSplit_f(repcomp, splitCols=c("Names"), sep="_", drop = F)

#To better address the difference column in ggplot I need to rename it something simple and short. 
repcomp<-rename(repcomp, c("rep1res2$sig.cpD2 - rep2res2$sig.cpD2"="rep.diff", "Names_1"="Pop", "Names_2"="Treat", "Names_3"="Sample"))

#Now I just run the data through ggplot to generate a bar graph exploring the differences between the two replicate in terms of Ct values.
ggplot(repcomp, aes(x=Names, y=rep.diff, fill=Pop))+geom_bar(stat="identity")

craf<-as.data.frame(cbind(rep1res2$expression,rep1res2$Names,rep1res2$Pop,rep1res2$Treat,rep2res2$expression))
craf<-rename(craf, c(V1="rep1.expr","V2"="name","V3"="pop","V4"="treat"
                                   ,"V5"="rep2.expr"))

craf$rep1.expr<-as.numeric(as.character(craf$rep1.expr))
craf$rep2.expr<-as.numeric(as.character(craf$rep2.expr))


craf$avgexpr<-rowMeans(craf[,c("rep1.expr","rep2.expr")],na.rm=F)

craf<-craf[which(craf$pop!=c("NT","**NT")),]

craf<-craf[which(craf$pop!=c("NT","**NT")),]

ggplot(craf, aes(x=treat,y=avgexpr, fill=pop))+geom_boxplot()
ggplot(craf, aes(x=name, y=avgexpr, fill=pop))+geom_bar(stat="identity")

ggplot(craf, aes(x=pop,y=avgexpr, fill=treat))+geom_boxplot()

fit<-aov(avgexpr~pop+treat+pop:treat,data=craf)
fit
TukeyHSD(fit)

fit2<-aov(avgexpr ~ pop, data=craf[which(craf$treat=="C"),])
fit2
TukeyHSD(fit2)

fit3<-aov(avgexpr~pop, data=craf[which(craf$treat=="T"),])
fit3
TukeyHSD(fit3)

fit4<-t.test(avgexpr~treat, data=craf[which(craf$pop=="H"),])
fit4

fit5<-t.test(avgexpr~treat, data=craf[which(craf$pop=="N"),])
fit5

fit6<-t.test(avgexpr~treat, data=craf[which(craf$pop=="S"),])
fit6