Organizing Code With Structured Programming

Use the code that you worked on in Homework #8 (creating fake data sets) , and re-organize it following the principles of structured programming. Do all the work in a single chunk in your R markdown file, just as if you were writing a single R script. Start with all of your annotated functions, preliminary calls, and global variables. The program body should be only a few lines of code that call the appropriate functions and run them in the correct order. Make sure that the output from one function serves as the input to the next. You can either daisy-chain the functions or write separate lines of code to hold elements in temporary variables and pass the along.

#-------------
# FUNCTION get_data
# description: read in csv file
# inputs: csv file
# outputs: dataframe
###################################
library(ggplot2)
library(ggforce)
get_data <- function(file_name = NULL){
  
  if(is.null(file_name)){
    df <- data.frame(ID = 101:110, 
                     var_a = runif(10),
                     var_b = runif(10))
  } else {
    df <- read.table(file = file_name,
                     header = TRUE,
                     sep = ",",
                     stringsAsFactors = FALSE)
  }
  
  return(df)
} # end of get_data
#--------------------------------
# -------------------------------
# FUNCTION calculate
# description: fits an ANOVA model
# inputs: x and y vectors of numeric; must be the same length
# outputs: entire model summary from aov
###################################
calculations <- function(x_var = runif(10),
                            y_var = runif(10)){
  
  df <- data.frame(x_var, y_var)
  aov_model <- aov(y_var ~ x_var, data = df)
  
  
  return(summary(aov_model))
  
} # end of calculate
#--------------------------------
# -------------------------------
# FUNCTION summarize_output
# description: pull elements from model summary list
# inputs: list from summary call of anova
# outputs: vector of regression residuals
###################################
summarize_output <- function(z = NULL){
  
  if(is.null(z)) {
    z <- summary(lm(runif(10) ~ runif(10)))
  }
  
  return(z[[1]][[5]][1])
  
} # end of summarize_output
#--------------------------------
# -------------------------------
# FUNCTION graph_results
# description: graph data and fitted OLS line
# inputs: x and y vectors of numeric. Must be same length.
# outputs: creates graph
###################################
graph_results <- function(x_var = runif(10),
                          y_var = runif(10)){
  
  df <- data.frame(x_var, y_var)
  p1 <- qplot(data = df, 
              x = x_var, 
              y = y_var,
              geom = c("boxplot"),
              fill = x_var)
  print(p1)
  message("Message: Boxplot created!")
  
  
} # end of graph_results
#----------------------------------

Structured programming script

# ------------------------------
# Structured programming for data simulation
# 
# ------------------------------
#

# load packages ------------------------------
library(ggplot2)

# source files ------------------------------
# source(myFunctions.R)
# for the purpose of this exercise functions appear above

# Global Variables ------------------------------
treedata <- read.csv("./Photosynthesis/Survey_Measurements_8.16.21.csv")
head(treedata)
##     ï..Date Obs   ID FIACode treatment   HHMMSS FTime EBal.    Photo      Cond
## 1 8/16/2021   1 14_3    BELE         0 10:08:59  32.5     0 12.53556 0.3481423
## 2 8/16/2021   2 14_3    BELE         0 10:09:00  33.0     0 12.54026 0.3479130
## 3 8/16/2021   3 14_3    BELE         0 10:09:01  34.0     0 12.52148 0.3475995
## 4 8/16/2021   4 14_3    BELE         0 10:09:02  35.0     0 12.47005 0.3481674
## 5 8/16/2021   5 14_3    BELE         0 10:09:03  36.0     0 12.44985 0.3476661
## 6 8/16/2021   6 14_4    BELE         0 10:12:44 257.5     0 14.78854 0.3582530
##        WUE       Ci   Trmmol      VpdL   CTleaf Area BLC_1 StmRat BLCond
## 1       NA 290.7187 3.115642 0.9432341 24.71848    6  1.42      1   2.84
## 2       NA 290.6274 3.116555 0.9440598 24.72474    6  1.42      1   2.84
## 3       NA 290.6968 3.116731 0.9448609 24.73230    6  1.42      1   2.84
## 4 35.81625 291.0692 3.116755 0.9434954 24.72663    6  1.42      1   2.84
## 5 35.80980 291.0892 3.115978 0.9444644 24.73212    6  1.42      1   2.84
## 6 41.27959 273.6998 3.338441 0.9837283 25.57676    6  1.42      1   2.84
##       Tair    Tleaf     TBlk     CO2R     CO2S     H2OR     H2OS     RH_R
## 1 24.88567 24.71848 24.84412 400.5698 359.5210 13.46277 22.61652 41.15604
## 2 24.88865 24.72474 24.84800 400.5547 359.4952 13.46456 22.62002 41.15431
## 3 24.89376 24.73230 24.85443 400.5295 359.5264 13.47055 22.62638 41.16000
## 4 24.89658 24.72663 24.85843 400.3872 359.5391 13.47389 22.62962 41.16315
## 5 24.90091 24.73212 24.86306 400.3324 359.5447 13.47658 22.63026 41.16066
## 6 25.56162 25.57676 25.54028 400.0373 352.0499 14.10086 23.89642 41.40522
##       RH_S     Flow     PARi       PARo    Press     CsMch     HsMch    CsMchSD
## 1 69.13927 199.6021 1999.822  224.99693 96.54293 0.7621966 0.0360828 0.05226203
## 2 69.13789 199.6224 1999.819  219.99118 96.54324 0.7621966 0.0360828 0.05226203
## 3 69.13616 199.6243 1999.749  204.68234 96.54314 0.7621966 0.0360828 0.05226203
## 4 69.13419 199.6273 1999.699  113.41531 96.54282 0.7621966 0.0360828 0.05226203
## 5 69.11817 199.6221 1999.659   32.73874 96.54263 0.7621966 0.0360828 0.05226203
## 6 70.16853 199.6005 1998.483 1178.16553 96.53922 0.7621966 0.0360828 0.05226203
##       HsMchSD    CrMchSD     HrMchSD StableF   BLCslope BLCoffst f_parin
## 1 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
## 2 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
## 3 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
## 4 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
## 5 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
## 6 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
##   f_parout alphaK Status       fda       Trans   Tair_K  Twall_K  R.W.m2.
## 1        0   0.16 111115 0.3326702 0.003115642 297.8685 298.0357 319.9715
## 2        0   0.16 111115 0.3327039 0.003116555 297.8747 298.0387 319.9710
## 3        0   0.16 111115 0.3327071 0.003116731 297.8823 298.0438 319.9598
## 4        0   0.16 111115 0.3327122 0.003116755 297.8766 298.0466 319.9519
## 5        0   0.16 111115 0.3327035 0.003115978 297.8821 298.0509 319.9454
## 6        0   0.16 111115 0.3326675 0.003338441 298.7268 298.7116 319.7573
##      Tl.Ta  SVTleaf    h2o_i   h20diff    CTair   SVTair  CndTotal   vp_kPa
## 1 2.097678 3.126699 32.38662  9.770100 24.80207 3.142349 0.3101255 2.183465
## 2 2.096773 3.127870 32.39864  9.778621 24.80670 3.143218 0.3099435 2.183810
## 3 2.096220 3.129283 32.41331  9.786929 24.81303 3.144406 0.3096946 2.184422
## 4 2.097233 3.128223 32.40244  9.772817 24.81161 3.144139 0.3101454 2.184727
## 5 2.097384 3.129250 32.41314  9.782874 24.81652 3.145061 0.3097475 2.184785
## 6 1.957674 3.290670 34.08635 10.189935 25.56919 3.289192 0.3181232 2.306941
##        VpdA    CndCO2    Ci_Pa     Ci.Ca    RHsfc    C2sfc     AHs.Cs
## 1 0.9588844 0.1969195 28.06683 0.8086278 72.36718 353.5622 0.02565782
## 2 0.9594071 0.1968022 28.05811 0.8084319 72.35114 353.5342 0.02566378
## 3 0.9599844 0.1966416 28.06478 0.8085548 72.33715 353.5743 0.02561748
## 4 0.9594113 0.1969324 28.10064 0.8095620 72.37280 353.6115 0.02552215
## 5 0.9602756 0.1966757 28.10252 0.8096051 72.34899 353.6267 0.02547134
## 6 0.9822506 0.2020810 26.42277 0.7774462 72.66159 345.0201 0.03114481
xcol <- 4 # column 4 is species
ycol <- 9 # column 9 is Amax

# Program body ------------------------------
x <- treedata[,xcol] # predictor variable
y <- treedata[,ycol] # response variable

x1 <- calculations(x_var = x, y_var = y)
print(x1)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## x_var         2   1128   564.0   46.13 <2e-16 ***
## Residuals   237   2898    12.2                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 8 observations deleted due to missingness
x2 <- summarize_output(x1)
print(x2)
## [1] 1.201706e-17
graph_results(x_var = x, y_var = y)
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Message: Boxplot created!

Once your code is up and working, modify your program to do something else: record a new summary variable, code a new statistical analysis, or create a different set of random variables or output graph. Do not rewrite any of your existing functions. Instead, copy them, rename them, and then modify them to do new things. Once your new functions are written, add some more lines of program code, calling a mixture of your previous functions and your new functions to get the job done.

# New Functions ------------------------------


# -------------------------------
# FUNCTION summarize_output2
# description: show treatment differences from anova
# inputs: list from summary call of anova
# outputs: contrast table
###################################
summarize_output2 <- function(x_var = runif(10), y_var = runif(10)){
  
  df <- data.frame(x_var, y_var)
  cont <- summary.lm(aov(y_var ~ x_var, data = df))
  
  return(cont[4])
  
} # end of summarize_output2
#--------------------------------

# -------------------------------
# FUNCTION graph_results2
# description: graph data 
# inputs: x and y vectors of numeric. Must be same length.
# outputs: creates graph
###################################
graph_results2 <- function(x_var = runif(10),
                          y_var = runif(10)){
  
  df <- data.frame(x_var, y_var)
  p1 <- ggplot(data = df, aes(x = x_var, y = y_var, color = x_var)) + 
              geom_bar() 

  print(p1)
  message("Message: Sinaplot created!")
  
  
} # end of graph_results
#----------------------------------
# ------------------------------
# Structured programming 2
# 
# ------------------------------
#

# load packages ------------------------------
library(ggplot2)
library(ggforce)

# source files ------------------------------
# source(myFunctions.R)
# for the purpose of this exercise functions appear above


# Global Variables ------------------------------
treedata <- read.csv("./Photosynthesis/Survey_Measurements_8.16.21.csv")
head(treedata)
##     ï..Date Obs   ID FIACode treatment   HHMMSS FTime EBal.    Photo      Cond
## 1 8/16/2021   1 14_3    BELE         0 10:08:59  32.5     0 12.53556 0.3481423
## 2 8/16/2021   2 14_3    BELE         0 10:09:00  33.0     0 12.54026 0.3479130
## 3 8/16/2021   3 14_3    BELE         0 10:09:01  34.0     0 12.52148 0.3475995
## 4 8/16/2021   4 14_3    BELE         0 10:09:02  35.0     0 12.47005 0.3481674
## 5 8/16/2021   5 14_3    BELE         0 10:09:03  36.0     0 12.44985 0.3476661
## 6 8/16/2021   6 14_4    BELE         0 10:12:44 257.5     0 14.78854 0.3582530
##        WUE       Ci   Trmmol      VpdL   CTleaf Area BLC_1 StmRat BLCond
## 1       NA 290.7187 3.115642 0.9432341 24.71848    6  1.42      1   2.84
## 2       NA 290.6274 3.116555 0.9440598 24.72474    6  1.42      1   2.84
## 3       NA 290.6968 3.116731 0.9448609 24.73230    6  1.42      1   2.84
## 4 35.81625 291.0692 3.116755 0.9434954 24.72663    6  1.42      1   2.84
## 5 35.80980 291.0892 3.115978 0.9444644 24.73212    6  1.42      1   2.84
## 6 41.27959 273.6998 3.338441 0.9837283 25.57676    6  1.42      1   2.84
##       Tair    Tleaf     TBlk     CO2R     CO2S     H2OR     H2OS     RH_R
## 1 24.88567 24.71848 24.84412 400.5698 359.5210 13.46277 22.61652 41.15604
## 2 24.88865 24.72474 24.84800 400.5547 359.4952 13.46456 22.62002 41.15431
## 3 24.89376 24.73230 24.85443 400.5295 359.5264 13.47055 22.62638 41.16000
## 4 24.89658 24.72663 24.85843 400.3872 359.5391 13.47389 22.62962 41.16315
## 5 24.90091 24.73212 24.86306 400.3324 359.5447 13.47658 22.63026 41.16066
## 6 25.56162 25.57676 25.54028 400.0373 352.0499 14.10086 23.89642 41.40522
##       RH_S     Flow     PARi       PARo    Press     CsMch     HsMch    CsMchSD
## 1 69.13927 199.6021 1999.822  224.99693 96.54293 0.7621966 0.0360828 0.05226203
## 2 69.13789 199.6224 1999.819  219.99118 96.54324 0.7621966 0.0360828 0.05226203
## 3 69.13616 199.6243 1999.749  204.68234 96.54314 0.7621966 0.0360828 0.05226203
## 4 69.13419 199.6273 1999.699  113.41531 96.54282 0.7621966 0.0360828 0.05226203
## 5 69.11817 199.6221 1999.659   32.73874 96.54263 0.7621966 0.0360828 0.05226203
## 6 70.16853 199.6005 1998.483 1178.16553 96.53922 0.7621966 0.0360828 0.05226203
##       HsMchSD    CrMchSD     HrMchSD StableF   BLCslope BLCoffst f_parin
## 1 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
## 2 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
## 3 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
## 4 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
## 5 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
## 6 0.008292885 0.02990977 0.009384986       1 -0.2195652 2.737391       1
##   f_parout alphaK Status       fda       Trans   Tair_K  Twall_K  R.W.m2.
## 1        0   0.16 111115 0.3326702 0.003115642 297.8685 298.0357 319.9715
## 2        0   0.16 111115 0.3327039 0.003116555 297.8747 298.0387 319.9710
## 3        0   0.16 111115 0.3327071 0.003116731 297.8823 298.0438 319.9598
## 4        0   0.16 111115 0.3327122 0.003116755 297.8766 298.0466 319.9519
## 5        0   0.16 111115 0.3327035 0.003115978 297.8821 298.0509 319.9454
## 6        0   0.16 111115 0.3326675 0.003338441 298.7268 298.7116 319.7573
##      Tl.Ta  SVTleaf    h2o_i   h20diff    CTair   SVTair  CndTotal   vp_kPa
## 1 2.097678 3.126699 32.38662  9.770100 24.80207 3.142349 0.3101255 2.183465
## 2 2.096773 3.127870 32.39864  9.778621 24.80670 3.143218 0.3099435 2.183810
## 3 2.096220 3.129283 32.41331  9.786929 24.81303 3.144406 0.3096946 2.184422
## 4 2.097233 3.128223 32.40244  9.772817 24.81161 3.144139 0.3101454 2.184727
## 5 2.097384 3.129250 32.41314  9.782874 24.81652 3.145061 0.3097475 2.184785
## 6 1.957674 3.290670 34.08635 10.189935 25.56919 3.289192 0.3181232 2.306941
##        VpdA    CndCO2    Ci_Pa     Ci.Ca    RHsfc    C2sfc     AHs.Cs
## 1 0.9588844 0.1969195 28.06683 0.8086278 72.36718 353.5622 0.02565782
## 2 0.9594071 0.1968022 28.05811 0.8084319 72.35114 353.5342 0.02566378
## 3 0.9599844 0.1966416 28.06478 0.8085548 72.33715 353.5743 0.02561748
## 4 0.9594113 0.1969324 28.10064 0.8095620 72.37280 353.6115 0.02552215
## 5 0.9602756 0.1966757 28.10252 0.8096051 72.34899 353.6267 0.02547134
## 6 0.9822506 0.2020810 26.42277 0.7774462 72.66159 345.0201 0.03114481
xcol <- 4 # column 4 is species
ycol <- 9 # column 9 is Amax

# Program body ------------------------------
x <- treedata[,xcol] # predictor variable
y <- treedata[,ycol] # response variable

x1 <- calculations(x_var = x, y_var = y)
print(x1)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## x_var         2   1128   564.0   46.13 <2e-16 ***
## Residuals   237   2898    12.2                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 8 observations deleted due to missingness
x2 <- summarize_output(x1)
print(x2)
## [1] 1.201706e-17
graph_results(x_var = x, y_var = y)
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).
## Message: Boxplot created!

# anova summary
x3 <- calculations(x_var = x, y_var = y)
print(x3)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## x_var         2   1128   564.0   46.13 <2e-16 ***
## Residuals   237   2898    12.2                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 8 observations deleted due to missingness
# p-value
x4 <- summarize_output(x3)
print(x4)
## [1] 1.201706e-17
# contrast - Which treatments are different than control?
x5 <- summarize_output2(x_var = x, y_var = y)
print(x5)
## $coefficients
##                Estimate Std. Error    t value     Pr(>|t|)
## (Intercept)  8.76313824  0.4037730 21.7031310 3.009928e-58
## x_varCADE   -0.08215093  0.5539720 -0.1482944 8.822365e-01
## x_varQURU    4.55485689  0.5620282  8.1043205 2.822672e-14