########## # This script is designed to be run as part of the R Studio Project 'titrator.Rproj' found in the top level of this repo. # It requires the file: # - daily_calibration_log.csv # This is a script designed to parse out sea water sample titration data # exported from LabX 2017 (v. 8.0.0; Mettler Toledo) with the following settings: ### LabX Method ### # - Dual endpoint titration method (see LabX_method_files folder in this repo) ### Export Method ### # - Simple CSV format # - UTF-8 file encoding # - Raw Data # - Table of Measured Values # The output/product consists of the following: # - A two column data frame with sample names and corresponding total alkalinty values (calculated using the seacarb library at() function) ########## # Load necessary libraries library(seacarb) library(tidyverse) # Load file ## Enter path to desired titration data file. # data_file <- 'data/titration_data/sample_data/2018-04-03T09_12_45_pH_TA_titration_T363.csv' # # # Vector of salinity values (UNITS NEEDED) # ### Manually enter a comma separated list of values which match the order of the samples in data_file # salinities <- c(26.5, 26.4, 26.5, 26.5, 26.5, 26.5) # # # # # Extract date from filename # ## Format: yyyymmdd # data_date <- data_file %>% basename() %>% substr(1, 10) %>% gsub("-", "", .) %>% as.numeric() # # # Load calibration log file # calibration_daily_log <- read.csv(file = "data/cal_data/daily_calibration_log.csv") # # # # # # # Acid titrant density function. # # Requires sample temperature. # # Batch A10 density function - provided with titrant documentation # A10_density <- function(temperature) { # 1.02882 - (0.0001067*temperature) - (0.0000041*(temperature)^2) # } # A10_concentration <- 0.100215 #mol/kg # # # # Extract calibration voltages, based on sample data file run date. # pH3.0 <- calibration_daily_log %>% filter(date == data_date) %>% select(mean_E_pH3.0.mV.) %>% .[1,1] # pH3.5 <- calibration_daily_log %>% filter(date == data_date) %>% select(mean_E_pH3.5.mV.) %>% .[1,1] # # # # mols to umols conversion # # mol_to_umol <- 1000000 # # # # Column headers # # V is volumen in mL # # t is time in seconds # # E is voltage in mV # # T is temperature in C # # dV/dT is change in voltage divided by change in temperature # # S is salinity in UNITS NEEDED # # weight is weight in grams # headers <- c("V", "t", "E", "T", "dV/dT", "S", "weight") # # # ### Read data in as csv table that handles issue of having more columns in bottom portion of file than in top portion. # # Sets file encoding to rm weird characters # # Sets number of columns and assigns column names (V#) based on total number of fields detected in the file. # sample_data <- read.table(data_file, header = FALSE, stringsAsFactors = FALSE, na.strings = "NaN", fileEncoding="UTF-8-BOM", sep = ",", col.names = paste0("V",seq_len(max(count.fields(data_file, sep = ',')))), fill = TRUE) # # # Remove last two columns # sample_data <- sample_data[, -c(6,7)] # # # Pulls total sample number from Row 2, Col. 2, position 11. # # Converts from string to number. # # Data export must be Raw Data & Total Measured Values. # total_samples <- as.numeric(sample_data[2,2] %>% substr(11,11)) # # # ### Extract sample names # # # Identifies rows starting with "Scope" in column 1 # sample_name_positions <- grep("^Scope", sample_data$V1) # # # Subsets the entire data set based on a subset of sample_name_positions. # # Uses the length of the sample_name_positions vector divide by two because there are two entries per sample in the dataset. # sample_list <- sample_data[sample_name_positions[1:(length(sample_name_positions)/2)], 2] # # # Pulls out the actual sample names using the number of characters, minus 1 to get rid of ending ")" in cells, as the stop value for substr. # # Stores as a list, which will be useful for assigning data to each sample name later on. # sample_names <- substr(sample_list, 14, as.numeric(nchar(sample_list))-1) # # # Initialize list for storing sample data. # sample_data_list <- list() # # # ### Extract samples weights # # # Pulls the weight field by searching for rows with "Sample size". # weights_with_units <- sample_data[grep("^Sample size", sample_data$V1), 2] # # # Determines the string length by converting to characters and counting the characters. # # Uses grep to search for rows in column 1 that begin with "Sample size". # # Subtracts two from character length to account for "g" at end of entry. # weight_char_counts <- weights_with_units %>% # nchar() %>% # as.numeric() - 2 # # # Removes the last two characters from the weight field (g) # # of each entry in the weights_with_units vector. # sample_weights <- as.numeric(substr(weights_with_units,1,weight_char_counts)) # # # ### Parse out necessary info from two-part titration # # # Identify rows that contain "TitrationEP1" text in column 2 # EP1_titrations_rows <- grep("^TitrationEP1", sample_data$V2) # # Identify rows that contain "TitrationEP2" text in column 2 # EP2_titrations_rows <- grep("^TitrationEP2", sample_data$V2) # # # Create list of endpoint 1 (EP1) titrations # # Will be used to store EP1 final volumes # EP1_Vf <- list() # for (row in 1:length(EP1_titrations_rows)){ # EP1_Vf[[row]] <- paste("EP1_Vf_", row, sep = "") # } # # # Pull out final EP1 volumes # # Final EP1 volumes are the row before the beginning of each EP2 titration data set; thus, subtract "1" from each EP2 titration row value # for (item in 1:length(EP1_titrations_rows)){ # EP1_Vf[[item]]<- sample_data[(EP2_titrations_rows[item]-1), 1] # } # # #Convert EP1_Vf values to numeric. # EP1_Vf <- sapply(EP1_Vf, as.numeric) # # # ### Parse out EP2 data. # # # Beginning of data == EP2 row#+2 # # End of data == the next EP1 titration row - 2 # # UNLESS # # Last entry - which selects to end of file (e.g. tail(sample_data, (nrow(sample_data)) # for (item in 1:length(EP2_titrations_rows)){ # if (item == length(EP2_titrations_rows)){ # sample_data_list[[item]]<- tail(sample_data, (nrow(sample_data) - (EP2_titrations_rows[item]+1))) # } else { # sample_data_list[[item]]<- sample_data[(EP2_titrations_rows[item]+2):(EP1_titrations_rows[item+1]-2),] # } # } # # # # Convert all data frames in sample_data_list to numeric # # Add column names (headers) to each data frame in sample_data_list list # for (item in 1:length(sample_data_list)){ # sample_data_list[[item]]$S <- salinities[item] # sample_data_list[[item]]$weight <- sample_weights[item] # sample_data_list[[item]] <- as.data.frame(sapply(sample_data_list[[item]], as.numeric)) # colnames(sample_data_list[[item]]) <- headers # } # # # # Determine total acid added to each sample # # First, start to loop through each data frame in the sample_data_list # # For each data frame: # # - set acid volume # # - calculate the acid added in the final titration # # - while loop to: # # -- calculate cumulative acid added at each titration endpoint # # - determine final cumulative acid amount and assign to last row of data frame # # -- write output file for each sample to current directory # for (item in 1:length(sample_data_list)){ # total_acid_vol <- EP1_Vf[[item]] # final_acid_addition <- sample_data_list[[item]][nrow(sample_data_list[[item]]), "V"] - sample_data_list[[item]][(nrow(sample_data_list[[item]]) - 1), "V"] # row <- 1 # while (row < nrow(sample_data_list[[item]])){ # total_acid_vol <- total_acid_vol + ((sample_data_list[[item]][row+1, "V"] - sample_data_list[[item]][row, "V"])) # sample_data_list[[item]][row, "V"] <- total_acid_vol # row <- row + 1 # } # sample_data_list[[item]][nrow(sample_data_list[[item]]), "V"] <- total_acid_vol + final_acid_addition # } # # # # # Creates function that accepts a data frame, salinity values, and sample weights. # # Uses dplyr library to filter data for use in seacarb library: # # temperature data (T) and convert to vector (.$T) # # potential data (T) and convert to vector (.$E) # # volume data (V) and convert to vector (.$V) # # Pass data to seacarb library at() function. # TA_calcs <- function(df, salinities, sample_weights) { # T_data <- df %>% filter(E <= pH3.0 & E >= pH3.5) %>% select(T) %>% .$T # E_data <- df %>% filter(E <= pH3.0 & E >= pH3.5) %>% select(E) %>% .$E # volume_data <- df %>% filter(E <= pH3.0 & E >= pH3.5) %>% select(V) %>% .$V # mol_to_umol*(at(S=salinities, T=T_data, C=A10_concentration, d=A10_density(mean(T_data)), weight=sample_weights, E=E_data, volume=volume_data)) # } # # # Creates a numeric vector. # # Uses mapply() function to run the the TA_calcs() function, while supplying: # # - a list of dataframes (sample_data_list) # # - a vector of salinites (salinities) # # - a vector of sample weights (sample_weights) # TA_values <- mapply(TA_calcs, sample_data_list, salinities, sample_weights) # # # Creates a two column data frame. # TA_df <- data.frame(sample_names, TA_values)