Creating Fake Data Sets To Explore Hypotheses Benefit of fake datasets: can be used for grant proposals to generate expected outcomes
I’m looking broadly at forest stand structure and physiological responses to adaptive silviculture treatements. For this assignment I’m looking at aboveground biomass in 3 treatments (resistance, resilience, and transition) as well as an uncut control. Aboveground biomass is expected to be lower in more heavily cut treatments and highest in control. Resistance and resilience have similar cuttings so these will have similar means
Treatments and control (n = 4) have 10 plots in 40 blocks (160 total). Resistance (RT): unifrom thinning and few trees removed Resilience (RL): variable thinning and more trees removed Transition (TR): Gap cuts and plantings, most trees removed Control (CT): control, uncut
Experiment began in 2016, means are based on means collected from the site for aboveground biomass along with simliar standard deviations Sample size and everything included in R code below with aboveground biomass means declining as treatments increase in cutting
# all treatments have 40 plots
# control treatment has plot mean of 150 Mg/ha, minimal variability
CT <- rnorm(40, mean = 150, sd = 15)
CT <- data.frame(CT, "CT")
colnames(CT) <- c("agb_Mg", "treatment")
# resistance treatment has lower mean, still low variability
RT <- rnorm(40, mean = 140, sd = 20)
RT <- data.frame(RT, "RT")
colnames(RT) <- c("agb_Mg", "treatment")
# resilience treatment has lower mean, and higher variability
RL <- rnorm(40, mean = 130, sd = 35)
RL <- data.frame(RL, "RL")
colnames(RL) <- c("agb_Mg", "treatment")
# transition has lowest biomass, and most variability
TR <- rnorm(40, mean = 120, sd = 50)
TR <- data.frame(TR, "TR")
colnames(TR) <- c("agb_Mg", "treatment")
id <- c(1:160)
my_data <- rbind(CT, RT, RL, TR)
my_data <- cbind(id, my_data)
head(my_data)
## id agb_Mg treatment
## 1 1 162.2274 CT
## 2 2 156.2427 CT
## 3 3 129.4865 CT
## 4 4 147.5943 CT
## 5 5 160.3571 CT
## 6 6 147.2376 CT
tail(my_data)
## id agb_Mg treatment
## 155 155 216.73820 TR
## 156 156 136.81583 TR
## 157 157 94.67706 TR
## 158 158 53.51138 TR
## 159 159 145.95982 TR
## 160 160 125.28147 TR
#categorical x (treatment type) and continuous y (aboveground biomass) requires ANOVA for analysis
m1 <- aov(agb_Mg ~ treatment, data = my_data)
summary(m1)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 3 6175 2058.4 2.101 0.102
## Residuals 156 152820 979.6
library(ggplot2)
#ggplot(my_data, aes(x = treatment, y =agb_Mg, fill =treatment)) + geom_boxplot() #this figure works but it's producing an error when knitting regarding an error in loading png_will fix
#ggplot(data = my_data, aes(x = treatment, y = agb_Mg, fill = treatment)) + geom_boxplot()
#Go back and run data to visualize how variable the random numbers may be
#results are not incredibly variable, all means fall in similar range 5/5 times
# Converging on mean of 155
CT <- rnorm(40, mean = 170, sd = 15)
CT <- data.frame(CT, "CT")
colnames(CT) <- c("agb_Mg", "treatment")
RT <- rnorm(40, mean = 155, sd = 20)
RT <- data.frame(RT, "RT")
colnames(RT) <- c("agb_Mg", "treatment")
RL <- rnorm(40, mean = 155, sd = 35)
RL <- data.frame(RL, "RL")
colnames(RL) <- c("agb_Mg", "treatment")
TR <- rnorm(40, mean = 140, sd = 50)
TR <- data.frame(TR, "TR")
colnames(TR) <- c("agb_Mg", "treatment")
id <- c(1:160)
my_data <- rbind(CT, RT, RL, TR)
my_data <- cbind(id, my_data)
m1 <- aov(agb_Mg ~ treatment, data = my_data)
summary(m1)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 3 25432 8477 7.669 8.18e-05 ***
## Residuals 156 172449 1105
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#19/20 times, results are signficant
# Converging on mean of 155 (range of 5)
CT <- rnorm(40, mean = 160, sd = 15)
CT <- data.frame(CT, "CT")
colnames(CT) <- c("agb_Mg", "treatment")
RT <- rnorm(40, mean = 155, sd = 20)
RT <- data.frame(RT, "RT")
colnames(RT) <- c("agb_Mg", "treatment")
RL <- rnorm(40, mean = 155, sd = 35)
RL <- data.frame(RL, "RL")
colnames(RL) <- c("agb_Mg", "treatment")
TR <- rnorm(40, mean = 145, sd = 50)
TR <- data.frame(TR, "TR")
colnames(TR) <- c("agb_Mg", "treatment")
id <- c(1:160)
my_data <- rbind(CT, RT, RL, TR)
my_data <- cbind(id, my_data)
m1 <- aov(agb_Mg ~ treatment, data = my_data)
summary(m1)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 3 6107 2036 1.953 0.123
## Residuals 156 162574 1042
#difference is too small to detect for this session, 1/5 times generated significant results
#
####Difference of 15 between groups is ideal for generating significant results
# drop down to sample size of 30
CT <- rnorm(30, mean = 170, sd = 15)
CT <- data.frame(CT, "CT")
colnames(CT) <- c("agb_Mg", "treatment")
RT <- rnorm(30, mean = 155, sd = 20)
RT <- data.frame(RT, "RT")
colnames(RT) <- c("agb_Mg", "treatment")
RL <- rnorm(30, mean = 155, sd = 35)
RL <- data.frame(RL, "RL")
colnames(RL) <- c("agb_Mg", "treatment")
TR <- rnorm(30, mean = 140, sd = 50)
TR <- data.frame(TR, "TR")
colnames(TR) <- c("agb_Mg", "treatment")
id <- c(1:120)
my_data <- rbind(CT, RT, RL, TR)
my_data <- cbind(id, my_data)
m1 <- aov(agb_Mg ~ treatment, data = my_data)
summary(m1)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 3 17359 5786 4.685 0.00399 **
## Residuals 116 143267 1235
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Significant 9/10 tries
# drop down to sample size of 20
CT <- rnorm(20, mean = 170, sd = 15)
CT <- data.frame(CT, "CT")
colnames(CT) <- c("agb_Mg", "treatment")
RT <- rnorm(20, mean = 155, sd = 20)
RT <- data.frame(RT, "RT")
colnames(RT) <- c("agb_Mg", "treatment")
RL <- rnorm(20, mean = 155, sd = 35)
RL <- data.frame(RL, "RL")
colnames(RL) <- c("agb_Mg", "treatment")
TR <- rnorm(20, mean = 140, sd = 50)
TR <- data.frame(TR, "TR")
colnames(TR) <- c("agb_Mg", "treatment")
id <- c(1:80)
my_data <- rbind(CT, RT, RL, TR)
my_data <- cbind(id, my_data)
m1 <- aov(agb_Mg ~ treatment, data = my_data)
summary(m1)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 3 15777 5259 4.107 0.00934 **
## Residuals 76 97325 1281
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#signficant 8/10
# drop down to sample size of 10
CT <- rnorm(10, mean = 170, sd = 15)
CT <- data.frame(CT, "CT")
colnames(CT) <- c("agb_Mg", "treatment")
RT <- rnorm(10, mean = 155, sd = 20)
RT <- data.frame(RT, "RT")
colnames(RT) <- c("agb_Mg", "treatment")
RL <- rnorm(10, mean = 155, sd = 35)
RL <- data.frame(RL, "RL")
colnames(RL) <- c("agb_Mg", "treatment")
TR <- rnorm(10, mean = 140, sd = 50)
TR <- data.frame(TR, "TR")
colnames(TR) <- c("agb_Mg", "treatment")
id <- c(1:40)
my_data <- rbind(CT, RT, RL, TR)
my_data <- cbind(id, my_data)
m1 <- aov(agb_Mg ~ treatment, data = my_data)
summary(m1)
## Df Sum Sq Mean Sq F value Pr(>F)
## treatment 3 11023 3674 3.085 0.0394 *
## Residuals 36 42879 1191
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#significant 3/10 times
#
#minimum sample size of 20 for this particular dataset