library(ggplot2) # for graphics
library(MASS) # for maximum likelihood estimation
# quick and dirty, a truncated normal distribution to work on the solution set
z <- rnorm(n=3000,mean=0.2)
z <- data.frame(1:3000,z)
names(z) <- list("ID","myVar")
z <- z[z$myVar>0,]
str(z)
## 'data.frame': 1748 obs. of 2 variables:
## $ ID : int 1 4 5 6 12 13 14 15 17 18 ...
## $ myVar: num 0.875 0.161 1.478 1.809 0.556 ...
summary(z$myVar)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.002728 0.352746 0.719894 0.861014 1.274544 4.520947
p1 <- ggplot(data=z, aes(x=myVar, y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p1 <- p1 + geom_density(linetype="dotted",size=0.75)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
normPars <- fitdistr(z$myVar,"normal")
print(normPars)
## mean sd
## 0.86101383 0.63464353
## (0.01517956) (0.01073357)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 0.861 0.635
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 0.0152 0.0107
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 0.00023 0 0 0.000115
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 1748
## $ loglik : num -1686
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"] # note structure of getting a named attribute
## mean
## 0.8610138
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$myVar),len=length(z$myVar))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$myVar), args = list(mean = meanML, sd = sdML))
p1 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
expoPars <- fitdistr(z$myVar,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$myVar), args = list(rate=rateML))
p1 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$myVar), args = list(min=min(z$myVar), max=max(z$myVar)))
p1 + stat + stat2 + stat3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
gammaPars <- fitdistr(z$myVar,"gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$myVar), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
pSpecial <- ggplot(data=z, aes(x=myVar/(max(myVar + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=z$myVar/max(z$myVar + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$myVar), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
z <- read.csv("Freeman_Data.csv")
head(z)
## Date Obs Order ID species treatment HHMMSS FTime EBal. Photo
## 1 6/17/2021 61 61 1_9 CADE 1 11:03:33 3543.5 0 4.721591
## 2 6/17/2021 62 62 1_9 CADE 1 11:03:33 3544.0 0 4.705655
## 3 6/17/2021 63 63 1_9 CADE 1 11:03:34 3544.5 0 4.729064
## 4 6/17/2021 64 64 1_9 CADE 1 11:03:34 3545.0 0 4.741632
## 5 6/17/2021 65 65 1_9 CADE 1 11:03:35 3545.5 0 4.743628
## 6 6/9/2021 71 301 1_9 CADE 1 11:36:26 5690.5 0 9.568417
## Cond intr_WUE Ci Trmmol VpdL CTleaf Area BLC_1 StmRat
## 1 0.1697698 27.81172 340.0861 1.856931 1.084185 18.63775 6 1.42 0.5
## 2 0.1696595 27.73588 340.2325 1.855420 1.083972 18.63430 6 1.42 0.5
## 3 0.1693684 27.92176 339.9058 1.852310 1.083913 18.63184 6 1.42 0.5
## 4 0.1691572 28.03092 339.7150 1.849580 1.083602 18.62778 6 1.42 0.5
## 5 0.1689852 28.07126 339.6510 1.847782 1.083591 18.62634 6 1.42 0.5
## 6 0.2922992 32.73501 323.9740 3.052946 1.073363 23.56485 6 1.42 1.0
## BLCond Tair Tleaf TBlk CO2R CO2S H2OR H2OS
## 1 2.84 18.39852 18.63775 17.49838 400.1588 393.6112 9.058554 11.26330
## 2 2.84 18.39836 18.63430 17.49829 400.1564 393.6288 9.057750 11.26065
## 3 2.84 18.39805 18.63184 17.49820 400.1614 393.6073 9.058558 11.25777
## 4 2.84 18.39771 18.62778 17.49801 400.1612 393.5934 9.059276 11.25522
## 5 2.84 18.39773 18.62634 17.49825 400.1637 393.5943 9.059454 11.25328
## 6 2.84 21.69264 23.56485 20.10246 400.3486 387.4339 15.702841 19.29936
## RH_R RH_S Flow PARi PARo Press CsMch HsMch
## 1 40.58222 50.45947 499.6529 2000.318 473.8944 95.14421 -1.063225 0.1971472
## 2 40.57902 50.44809 499.6664 2000.272 374.8035 95.14422 -1.063225 0.1971472
## 3 40.58344 50.43618 499.6672 2000.276 369.1551 95.14424 -1.063225 0.1971472
## 4 40.58775 50.42611 499.6747 2000.251 333.8403 95.14480 -1.063225 0.1971472
## 5 40.58854 50.41741 499.6718 2000.256 248.9835 95.14486 -1.063225 0.1971472
## 6 57.62043 70.81761 499.4868 1999.142 631.7614 95.55821 -0.667627 -0.1099110
## CsMchSD HsMchSD CrMchSD HrMchSD StableF BLCslope BLCoffst
## 1 0.02566306 0.001353715 0.02074798 0.002030001 1.0000000 -0.2195652 2.737391
## 2 0.02566306 0.001353715 0.02074798 0.002030001 1.0000000 -0.2195652 2.737391
## 3 0.02566306 0.001353715 0.02074798 0.002030001 1.0000000 -0.2195652 2.737391
## 4 0.02566306 0.001353715 0.02074798 0.002030001 1.0000000 -0.2195652 2.737391
## 5 0.02566306 0.001353715 0.02074798 0.002030001 1.0000000 -0.2195652 2.737391
## 6 0.00000000 0.000000000 0.00000000 0.000000000 0.6666667 -0.2195652 2.737391
## f_parin f_parout alphaK Status fda Trans Tair_K Twall_K
## 1 1 0 0.16 111115 0.8327548 0.001856931 291.7878 291.5485
## 2 1 0 0.16 111115 0.8327773 0.001855420 291.7843 291.5484
## 3 1 0 0.16 111115 0.8327787 0.001852310 291.7818 291.5481
## 4 1 0 0.16 111115 0.8327912 0.001849580 291.7778 291.5477
## 5 1 0 0.16 111115 0.8327864 0.001847782 291.7763 291.5477
## 6 1 0 0.16 111115 0.8324780 0.003052946 296.7148 294.8426
## R.W.m2. Tl.Ta SVTleaf h2o_i h20diff CTair SVTair CndTotal
## 1 320.0509 2.699993 2.155823 22.65848 11.39518 18.51814 2.139739 0.1601937
## 2 320.0436 2.701089 2.155358 22.65359 11.39294 18.51633 2.139497 0.1600955
## 3 320.0442 2.702941 2.155025 22.65009 11.39232 18.51495 2.139311 0.1598363
## 4 320.0401 2.704744 2.154478 22.64420 11.38898 18.51275 2.139017 0.1596482
## 5 320.0409 2.705846 2.154283 22.64214 11.38886 18.51203 2.138921 0.1594950
## 6 319.8627 1.871631 2.917576 30.53192 11.23256 22.62874 2.756980 0.2650225
## vp_kPa VpdA CndCO2 Ci_Pa Ci.Ca RHsfc C2sfc AHs.Cs
## 1 1.071638 1.068101 0.1009396 32.35722 0.8640152 51.72697 391.3668 0.006240530
## 2 1.071386 1.068111 0.1008771 32.37116 0.8643486 51.72446 391.3920 0.006218764
## 3 1.071112 1.068199 0.1007125 32.34008 0.8635658 51.71510 391.3593 0.006249092
## 4 1.070876 1.068141 0.1005930 32.32211 0.8631116 51.71351 391.3394 0.006265825
## 5 1.070692 1.068229 0.1004957 32.31605 0.8629470 51.70697 391.3394 0.006267671
## 6 1.844213 0.912767 0.1678912 30.95837 0.8362046 65.79124 382.8855 0.016441415
summary(z)
## Date Obs Order ID
## Length:954 Min. : 1.0 Min. : 1.0 Length:954
## Class :character 1st Qu.: 44.0 1st Qu.:239.2 Class :character
## Mode :character Median :101.0 Median :477.5 Mode :character
## Mean :105.2 Mean :477.5
## 3rd Qu.:161.0 3rd Qu.:715.8
## Max. :251.0 Max. :954.0
## species treatment HHMMSS FTime
## Length:954 Min. :0.0000 Length:954 Min. : 32.5
## Class :character 1st Qu.:0.0000 Class :character 1st Qu.: 2661.2
## Mode :character Median :1.0000 Mode :character Median : 5740.8
## Mean :0.5839 Mean : 6146.8
## 3rd Qu.:1.0000 3rd Qu.: 9135.9
## Max. :1.0000 Max. :15848.0
## EBal. Photo Cond intr_WUE
## Min. :0 Min. : 0.6562 Min. :0.01017 Min. : 12.29
## 1st Qu.:0 1st Qu.: 6.2987 1st Qu.:0.11003 1st Qu.: 45.26
## Median :0 Median : 8.5409 Median :0.15859 Median : 55.29
## Mean :0 Mean : 8.8544 Mean :0.18501 Mean : 56.56
## 3rd Qu.:0 3rd Qu.:11.4125 3rd Qu.:0.23057 3rd Qu.: 66.34
## Max. :0 Max. :18.8356 Max. :0.60011 Max. :134.70
## Ci Trmmol VpdL CTleaf Area
## Min. :170.3 Min. :0.2255 Min. :0.7924 Min. :17.25 Min. :6
## 1st Qu.:260.2 1st Qu.:1.9528 1st Qu.:1.3125 1st Qu.:23.57 1st Qu.:6
## Median :279.3 Median :2.6025 Median :1.5733 Median :26.34 Median :6
## Mean :280.1 Mean :2.7570 Mean :1.6699 Mean :25.74 Mean :6
## 3rd Qu.:299.6 3rd Qu.:3.5422 3rd Qu.:2.0102 3rd Qu.:27.85 3rd Qu.:6
## Max. :362.6 Max. :7.5769 Max. :3.4890 Max. :35.05 Max. :6
## BLC_1 StmRat BLCond Tair
## Min. :1.42 Min. :0.5000 Min. :2.556 Min. :17.81
## 1st Qu.:1.42 1st Qu.:0.5000 1st Qu.:2.840 1st Qu.:21.66
## Median :1.42 Median :1.0000 Median :2.840 Median :25.10
## Mean :1.42 Mean :0.7573 Mean :2.771 Mean :24.36
## 3rd Qu.:1.42 3rd Qu.:1.0000 3rd Qu.:2.840 3rd Qu.:26.15
## Max. :1.42 Max. :1.0000 Max. :2.840 Max. :34.91
## Tleaf TBlk CO2R CO2S
## Min. :17.25 Min. :17.48 Min. :398.5 Min. :337.6
## 1st Qu.:23.57 1st Qu.:20.10 1st Qu.:399.9 1st Qu.:380.9
## Median :26.34 Median :23.71 Median :400.0 Median :387.4
## Mean :25.74 Mean :23.43 Mean :400.0 Mean :383.3
## 3rd Qu.:27.85 3rd Qu.:25.12 3rd Qu.:400.2 3rd Qu.:390.9
## Max. :35.05 Max. :34.97 Max. :401.7 Max. :399.1
## H2OR H2OS RH_R RH_S
## Min. : 5.776 Min. : 6.367 Min. :22.19 Min. :27.07
## 1st Qu.: 9.657 1st Qu.:12.288 1st Qu.:34.43 1st Qu.:46.68
## Median :14.790 Median :17.641 Median :41.08 Median :54.23
## Mean :13.494 Mean :17.968 Mean :41.45 Mean :54.49
## 3rd Qu.:16.917 3rd Qu.:22.055 3rd Qu.:47.76 3rd Qu.:63.58
## Max. :21.307 Max. :36.216 Max. :62.67 Max. :78.17
## Flow PARi PARo Press
## Min. :199.5 Min. :1884 Min. : 32.74 Min. :95.14
## 1st Qu.:499.5 1st Qu.:1999 1st Qu.: 370.70 1st Qu.:95.55
## Median :499.5 Median :2000 Median :1453.50 Median :95.70
## Mean :430.4 Mean :2000 Mean :1170.34 Mean :95.75
## 3rd Qu.:499.6 3rd Qu.:2001 3rd Qu.:1814.03 3rd Qu.:96.35
## Max. :499.8 Max. :2003 Max. :2304.63 Max. :96.54
## CsMch HsMch CsMchSD HsMchSD
## Min. :-1.0632 Min. :-0.22468 Min. :0.00000 Min. :0.000000
## 1st Qu.:-0.6676 1st Qu.:-0.10991 1st Qu.:0.00000 1st Qu.:0.000000
## Median :-0.6676 Median : 0.03608 Median :0.02566 Median :0.001354
## Mean :-0.1372 Mean : 0.04782 Mean :0.03284 Mean :0.004677
## 3rd Qu.: 0.7622 3rd Qu.: 0.11395 3rd Qu.:0.05226 3rd Qu.:0.008293
## Max. : 1.6566 Max. : 0.19715 Max. :0.07070 Max. :0.012293
## CrMchSD HrMchSD StableF BLCslope
## Min. :0.00000 Min. :0.000000 Min. :0.3333 Min. :-0.2196
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.6667 1st Qu.:-0.2196
## Median :0.02075 Median :0.002030 Median :1.0000 Median :-0.2196
## Mean :0.02724 Mean :0.004634 Mean :0.8326 Mean :-0.2196
## 3rd Qu.:0.02991 3rd Qu.:0.009385 3rd Qu.:1.0000 3rd Qu.:-0.2196
## Max. :0.09885 Max. :0.010118 Max. :1.0000 Max. :-0.2196
## BLCoffst f_parin f_parout alphaK Status
## Min. :2.737 Min. :1 Min. :0 Min. :0.16 Min. :111115
## 1st Qu.:2.737 1st Qu.:1 1st Qu.:0 1st Qu.:0.16 1st Qu.:111115
## Median :2.737 Median :1 Median :0 Median :0.16 Median :111115
## Mean :2.737 Mean :1 Mean :0 Mean :0.16 Mean :111115
## 3rd Qu.:2.737 3rd Qu.:1 3rd Qu.:0 3rd Qu.:0.16 3rd Qu.:111115
## Max. :2.737 Max. :1 Max. :0 Max. :0.16 Max. :111115
## fda Trans Tair_K Twall_K
## Min. :0.3325 Min. :0.0002255 Min. :290.4 Min. :291.0
## 1st Qu.:0.8325 1st Qu.:0.0019528 1st Qu.:296.7 1st Qu.:294.8
## Median :0.8326 Median :0.0026025 Median :299.5 Median :298.3
## Mean :0.7173 Mean :0.0027570 Mean :298.9 Mean :297.5
## 3rd Qu.:0.8327 3rd Qu.:0.0035422 3rd Qu.:301.0 3rd Qu.:299.3
## Max. :0.8330 Max. :0.0075769 Max. :308.2 Max. :308.1
## R.W.m2. Tl.Ta SVTleaf h2o_i
## Min. :301.5 Min. :-0.01955 Min. :1.976 Min. :20.75
## 1st Qu.:319.9 1st Qu.: 1.69136 1st Qu.:2.918 1st Qu.:30.54
## Median :320.0 Median : 2.14083 Median :3.442 Median :35.87
## Mean :320.0 Mean : 2.07646 Mean :3.393 Mean :35.40
## 3rd Qu.:320.1 3rd Qu.: 2.44755 3rd Qu.:3.761 3rd Qu.:39.32
## Max. :320.5 Max. : 3.49513 Max. :5.663 Max. :58.76
## h20diff CTair SVTair CndTotal
## Min. : 8.211 Min. :17.56 Min. :2.014 Min. :0.01013
## 1st Qu.:13.729 1st Qu.:22.63 1st Qu.:2.757 1st Qu.:0.10593
## Median :16.466 Median :25.61 Median :3.298 Median :0.15021
## Mean :17.436 Mean :25.05 Mean :3.265 Mean :0.16996
## 3rd Qu.:21.004 3rd Qu.:26.98 3rd Qu.:3.574 3rd Qu.:0.21325
## Max. :36.205 Max. :34.93 Max. :5.625 Max. :0.49543
## vp_kPa VpdA CndCO2 Ci_Pa
## Min. :0.6063 Min. :0.7173 Min. :0.006337 Min. :16.27
## 1st Qu.:1.1762 1st Qu.:1.1688 1st Qu.:0.066560 1st Qu.:24.92
## Median :1.6878 Median :1.4140 Median :0.094598 Median :26.71
## Mean :1.7228 Mean :1.5419 Mean :0.107462 Mean :26.82
## 3rd Qu.:2.1094 3rd Qu.:1.8666 3rd Qu.:0.134738 3rd Qu.:28.54
## Max. :3.4914 Max. :3.4515 Max. :0.317606 Max. :34.72
## Ci.Ca RHsfc C2sfc AHs.Cs
## Min. :0.4277 Min. :24.46 Min. :328.7 Min. :0.0004838
## 1st Qu.:0.6785 1st Qu.:44.21 1st Qu.:375.1 1st Qu.:0.0075363
## Median :0.7341 Median :50.25 Median :383.1 Median :0.0117282
## Mean :0.7312 Mean :51.77 Mean :379.0 Mean :0.0130881
## 3rd Qu.:0.7858 3rd Qu.:59.90 3rd Qu.:387.6 3rd Qu.:0.0164373
## Max. :0.9253 Max. :79.31 Max. :398.8 Max. :0.0444657
q <-z$Cond
summary(q)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01017 0.11003 0.15859 0.18501 0.23057 0.60011
p1 <- ggplot(data=z, aes(x=Cond, y=..density..)) + #..density.. creates a proportion
geom_histogram(color="grey60",fill="cornsilk",size=0.2)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p1 <- p1 + geom_density(linetype="dotted",size=0.75)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#added emperical density vurve. This is an empirical curve that is fitted to the data. It does not assume any particular probability distribution, but it smooths out the shape of the histogram
#Next, fit a normal distribution to your data and grab the maximum likelihood estimators of the two parameters of the normal, the mean and the variance:
normPars <- fitdistr(z$Cond,"normal")
print(normPars)
## mean sd
## 0.185013609 0.110777172
## (0.003586543) (0.002536069)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 0.185 0.111
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 0.00359 0.00254
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 1.29e-05 0.00 0.00 6.43e-06
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 954
## $ loglik : num 745
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"] # note structure of getting a named attribute
## mean
## 0.1850136
#Now let’s call the dnorm function inside ggplot’s stat_function to generate the probability density for the normal distribution. Read about stat_function in the help system to see how you can use this to add a smooth function to any ggplot. Note that we first get the maximum likelihood parameters for a normal distribution fitted to thse data by calling fitdistr. Then we pass those parameters (meanML and sdML to stat_function)
#plot normal probabilyt density
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(z$Cond),len=length(z$Cond))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$Cond), args = list(mean = meanML, sd = sdML))
p1 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot exponential probablity density
expoPars <- fitdistr(z$Cond,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$Cond), args = list(rate=rateML))
p1 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#plot uniform probability density
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$Cond), args = list(min=min(z$Cond), max=max(z$Cond)))
p1 + stat + stat2 + stat3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# plot gamma probablity density
gammaPars <- fitdistr(z$Cond,"gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$Cond), args = list(shape=shapeML, rate=rateML))
p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot beta probability density
pSpecial <- ggplot(data=z, aes(x=Cond/(max(Cond + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=z$Cond/max(z$Cond + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$Comp), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
q <- rgamma(n = 500, shapeML, rateML)
q#use shapeML and rateML in rgamma
## [1] 0.32208479 0.16502619 0.17464312 0.14842475 0.29346791 0.20664408
## [7] 0.12813399 0.21940998 0.29513711 0.40204143 0.07573097 0.09449199
## [13] 0.16535670 0.37917874 0.25384960 0.20446661 0.10590658 0.15752687
## [19] 0.23188440 0.22151754 0.36276443 0.21250439 0.14530906 0.31399371
## [25] 0.13401606 0.31507650 0.45698980 0.20050574 0.09505817 0.03753028
## [31] 0.11960317 0.14464823 0.13092897 0.21348309 0.02560610 0.08728100
## [37] 0.16396005 0.14427719 0.19613877 0.10756797 0.28086103 0.18555377
## [43] 0.10980657 0.17416338 0.06908640 0.50312772 0.14862841 0.01649639
## [49] 0.28607640 0.20737945 0.02593873 0.27762208 0.36798280 0.11598623
## [55] 0.22367543 0.15279207 0.06467828 0.21210642 0.19194515 0.22820546
## [61] 0.39979422 0.21098277 0.16923410 0.07829016 0.20674679 0.20967638
## [67] 0.05898751 0.10990839 0.10912834 0.13033732 0.06208470 0.14747564
## [73] 0.19606804 0.06989757 0.22355197 0.09567042 0.04292593 0.14311150
## [79] 0.14347675 0.10320793 0.31821338 0.47227481 0.16159965 0.15987425
## [85] 0.05428556 0.10014716 0.05174292 0.06645900 0.05777614 0.08831330
## [91] 0.27697868 0.10746113 0.44089793 0.13909675 0.11668900 0.47237546
## [97] 0.08138555 0.17133970 0.11085973 0.19266358 0.27191676 0.08538251
## [103] 0.15091124 0.14013227 0.22865539 0.06634284 0.01984652 0.26180584
## [109] 0.10330560 0.01833275 0.36347950 0.49340486 0.19153197 0.41998218
## [115] 0.12942955 0.08619411 0.31517099 0.28181402 0.27071831 0.16429987
## [121] 0.13827723 0.05878797 0.14558775 0.04192771 0.14363726 0.18520797
## [127] 0.46092490 0.16069960 0.23190248 0.28899199 0.17773008 0.01593680
## [133] 0.27299472 0.24032252 0.10786479 0.11809891 0.10389904 0.19879350
## [139] 0.34454430 0.09394508 0.25433860 0.27643606 0.03133894 0.08604417
## [145] 0.06667884 0.16102953 0.21970963 0.39093048 0.20761289 0.40217730
## [151] 0.26845414 0.16928411 0.04001114 0.14882146 0.39745643 0.21898595
## [157] 0.06287860 0.08911313 0.11387633 0.18591362 0.24574645 0.37719593
## [163] 0.04885887 0.23794448 0.25561148 0.34846201 0.30608543 0.13947636
## [169] 0.28104549 0.41840388 0.31743773 0.05062520 0.36942851 0.18010859
## [175] 0.19588442 0.16574779 0.13560240 0.28067479 0.16262407 0.02374562
## [181] 0.20503520 0.27444841 0.24872110 0.07530378 0.52345757 0.21396158
## [187] 0.08995805 0.06561010 0.17075928 0.16200117 0.05132997 0.07157841
## [193] 0.15028871 0.14944351 0.52201864 0.14580799 0.16028939 0.22724558
## [199] 0.09557811 0.15729121 0.06967339 0.16108364 0.09008520 0.14273723
## [205] 0.10049610 0.06888423 0.38753634 0.12796369 0.10331389 0.18996086
## [211] 0.42027145 0.16963734 0.15119778 0.12996284 0.07541334 0.11059010
## [217] 0.16388222 0.09061178 0.02736396 0.14220284 0.27064894 0.22595467
## [223] 0.16922261 0.46462537 0.10529734 0.04268695 0.45412548 0.34553530
## [229] 0.08915614 0.16582475 0.28940417 0.30565730 0.17873830 0.30028250
## [235] 0.16404454 0.10039177 0.16901978 0.22888024 0.25064926 0.11852583
## [241] 0.62627685 0.31440989 0.19677493 0.26766765 0.21416145 0.11445957
## [247] 0.06763977 0.40073396 0.03829374 0.12253443 0.26095524 0.40054753
## [253] 0.03421687 0.37363799 0.09430895 0.31604726 0.24531533 0.03708348
## [259] 0.25814694 0.11648023 0.26451557 0.18501149 0.16053748 0.07724244
## [265] 0.08445782 0.08372048 0.22922735 0.18785953 0.16225066 0.07304349
## [271] 0.13435220 0.19599565 0.06203358 0.11241962 0.07061495 0.15488219
## [277] 0.10944902 0.10512018 0.19264989 0.12439285 0.25181138 0.28858682
## [283] 0.13478416 0.37420267 0.06843646 0.31541633 0.38201632 0.30648446
## [289] 0.09680178 0.16690059 0.08098710 0.07713143 0.36194223 0.26340957
## [295] 0.04646732 0.30385399 0.06248841 0.10337545 0.20871974 0.44557255
## [301] 0.09369726 0.15750522 0.15840900 0.09444663 0.04592926 0.19349788
## [307] 0.27119772 0.24799463 0.15429049 0.12606470 0.13119856 0.14344828
## [313] 0.10941300 0.16222728 0.36066534 0.15447797 0.25420624 0.07734047
## [319] 0.25810849 0.26730461 0.16429694 0.24066740 0.49454306 0.23104672
## [325] 0.23818038 0.06834192 0.16350401 0.07320763 0.22783674 0.08894629
## [331] 0.11758456 0.17935664 0.04226176 0.08991218 0.22551279 0.18471980
## [337] 0.55775611 0.14560192 0.17982620 0.19793526 0.09782954 0.19942675
## [343] 0.19075196 0.16735213 0.16008160 0.14077441 0.15599649 0.10184174
## [349] 0.21072223 0.10124454 0.11139214 0.13785833 0.17819500 0.64512662
## [355] 0.11160631 0.17124090 0.45557887 0.07675279 0.37183217 0.34997798
## [361] 0.07644887 0.18409518 0.13812905 0.17722535 0.11291731 0.17491198
## [367] 0.12621475 0.15124941 0.22822138 0.13617275 0.12290063 0.21689431
## [373] 0.15767379 0.17497917 0.07973624 0.08067618 0.09117057 0.23900610
## [379] 0.05340955 0.11811814 0.15555478 0.15565393 1.02133802 0.17831604
## [385] 0.27434840 0.29831782 0.17425190 0.34845733 0.08105646 0.07303920
## [391] 0.26173147 0.04220562 0.31376495 0.08380645 0.10798210 0.13363024
## [397] 0.21357284 0.06800050 0.43269557 0.14613258 0.04626655 0.10067550
## [403] 0.03521479 0.24813241 0.19864691 0.05913707 0.22334429 0.09396565
## [409] 0.22923659 0.08089313 0.06534297 0.10599853 0.21549171 0.13685784
## [415] 0.17031137 0.22749680 0.12644336 0.08160246 0.06584935 0.06733512
## [421] 0.08792721 0.07544396 0.16155821 0.05598919 0.33296598 0.19984945
## [427] 0.14282646 0.33365180 0.17374673 0.15197354 0.17244417 0.07949644
## [433] 0.22098679 0.18023520 0.20342663 0.31698821 0.20122702 0.74786024
## [439] 0.53201868 0.03488785 0.22032207 0.10857365 0.14947936 0.03852039
## [445] 0.02751338 0.16555482 0.26035707 0.31539414 0.12515969 0.21420176
## [451] 0.25636185 0.26715035 0.09141504 0.12075731 0.25563666 0.15279523
## [457] 0.39058998 0.44218160 0.06982432 0.44491884 0.07871120 0.58894054
## [463] 0.11361491 0.19109724 0.13988229 0.33356062 0.19887296 0.20625765
## [469] 0.08969601 0.09809442 0.06146988 0.25223910 0.10634202 0.10724863
## [475] 0.18963684 0.05752828 0.21476279 0.29144553 0.14617357 0.08962697
## [481] 0.37859953 0.12640335 0.11167051 0.17589003 0.15246035 0.10680351
## [487] 0.17553008 0.11223948 0.29774506 0.34777359 0.16027488 0.23779105
## [493] 0.12158375 0.61181596 0.22014685 0.19470956 0.16335686 0.10596677
## [499] 0.21075838 0.14051538
q <- data.frame(1:500, q)
head(q)
## X1.500 q
## 1 1 0.3220848
## 2 2 0.1650262
## 3 3 0.1746431
## 4 4 0.1484248
## 5 5 0.2934679
## 6 6 0.2066441
normPars <- fitdistr(q$q,"normal")
print(normPars)
## mean sd
## 0.189240132 0.122253370
## (0.005467337) (0.003865991)
str(normPars)
## List of 5
## $ estimate: Named num [1:2] 0.189 0.122
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ sd : Named num [1:2] 0.00547 0.00387
## ..- attr(*, "names")= chr [1:2] "mean" "sd"
## $ vcov : num [1:2, 1:2] 2.99e-05 0.00 0.00 1.49e-05
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:2] "mean" "sd"
## .. ..$ : chr [1:2] "mean" "sd"
## $ n : int 500
## $ loglik : num 341
## - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"]
## mean
## 0.1892401
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]
xval <- seq(0,max(q$q),len=length(q$q))
stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(q$q), args = list(mean = meanML, sd = sdML)) #added this to histogram
expoPars <- fitdistr(q$q,"exponential")
rateML <- expoPars$estimate["rate"]
stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(q$q), args = list(rate=rateML)) #added this to histogram
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(q$q), args = list(min=min(q$q), max=max(q$q)))
#added to histogram
# plot gamma probablity density
gammaPars <- fitdistr(q$q,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]
stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(q$q), args = list(shape=shapeML, rate=rateML))
ggplot(data=q, aes(x=q, y=..density..)) + #..density.. creates a proportion
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
geom_density(linetype="dotted",size=0.75) + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Plot beta probability density
pSpecial <- ggplot(data=q, aes(x=q/(max(q + 0.1)), y=..density..)) +
geom_histogram(color="grey60",fill="cornsilk",size=0.2) +
xlim(c(0,1)) +
geom_density(size=0.75,linetype="dotted")
betaPars <- fitdistr(x=q$q/max(q$q + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]
statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(q$q), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
How do the two histogram profiles compare? Do you think the model is doing a good job of simulating realistic data that match your original measurements? Why or why not?
the two histograms are fairly similar in terms of the overall smoothing curve; however, the simulated histogram had a slightly lower peak compared to the real histogram. Gamma curves were simlar
If you have entered a large data frame with many columns, try running all of the code on a different variable to see how the simulation performs.
Once we get a little bit more R coding under our belts, we will return to the problem of simulating data and use some of this code again.