library(ggplot2) # for graphics
library(MASS) # for maximum likelihood estimation


# quick and dirty, a truncated normal distribution to work on the solution set

z <- rnorm(n=3000,mean=0.2)
z <- data.frame(1:3000,z)
names(z) <- list("ID","myVar")
z <- z[z$myVar>0,]
str(z)
## 'data.frame':    1748 obs. of  2 variables:
##  $ ID   : int  1 4 5 6 12 13 14 15 17 18 ...
##  $ myVar: num  0.875 0.161 1.478 1.809 0.556 ...
summary(z$myVar)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.002728 0.352746 0.719894 0.861014 1.274544 4.520947
p1 <- ggplot(data=z, aes(x=myVar, y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) 
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p1 <-  p1 +  geom_density(linetype="dotted",size=0.75)
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

normPars <- fitdistr(z$myVar,"normal")
print(normPars)
##       mean          sd    
##   0.86101383   0.63464353 
##  (0.01517956) (0.01073357)
str(normPars)
## List of 5
##  $ estimate: Named num [1:2] 0.861 0.635
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ sd      : Named num [1:2] 0.0152 0.0107
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ vcov    : num [1:2, 1:2] 0.00023 0 0 0.000115
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "mean" "sd"
##   .. ..$ : chr [1:2] "mean" "sd"
##  $ n       : int 1748
##  $ loglik  : num -1686
##  - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"] # note structure of getting a named attribute
##      mean 
## 0.8610138
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]

xval <- seq(0,max(z$myVar),len=length(z$myVar))

 stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$myVar), args = list(mean = meanML, sd = sdML))
 p1 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

expoPars <- fitdistr(z$myVar,"exponential")
rateML <- expoPars$estimate["rate"]

stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$myVar), args = list(rate=rateML))
 p1 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$myVar), args = list(min=min(z$myVar), max=max(z$myVar)))
 p1 + stat + stat2 + stat3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

gammaPars <- fitdistr(z$myVar,"gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]

stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$myVar), args = list(shape=shapeML, rate=rateML))
 p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 pSpecial <- ggplot(data=z, aes(x=myVar/(max(myVar + 0.1)), y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) + 
  xlim(c(0,1)) +
  geom_density(size=0.75,linetype="dotted")

betaPars <- fitdistr(x=z$myVar/max(z$myVar + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]

statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$myVar), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).

Adding in my own data

z <- read.csv("Freeman_Data.csv")
head(z)
##        Date Obs Order  ID species treatment   HHMMSS  FTime EBal.    Photo
## 1 6/17/2021  61    61 1_9    CADE         1 11:03:33 3543.5     0 4.721591
## 2 6/17/2021  62    62 1_9    CADE         1 11:03:33 3544.0     0 4.705655
## 3 6/17/2021  63    63 1_9    CADE         1 11:03:34 3544.5     0 4.729064
## 4 6/17/2021  64    64 1_9    CADE         1 11:03:34 3545.0     0 4.741632
## 5 6/17/2021  65    65 1_9    CADE         1 11:03:35 3545.5     0 4.743628
## 6  6/9/2021  71   301 1_9    CADE         1 11:36:26 5690.5     0 9.568417
##        Cond intr_WUE       Ci   Trmmol     VpdL   CTleaf Area BLC_1 StmRat
## 1 0.1697698 27.81172 340.0861 1.856931 1.084185 18.63775    6  1.42    0.5
## 2 0.1696595 27.73588 340.2325 1.855420 1.083972 18.63430    6  1.42    0.5
## 3 0.1693684 27.92176 339.9058 1.852310 1.083913 18.63184    6  1.42    0.5
## 4 0.1691572 28.03092 339.7150 1.849580 1.083602 18.62778    6  1.42    0.5
## 5 0.1689852 28.07126 339.6510 1.847782 1.083591 18.62634    6  1.42    0.5
## 6 0.2922992 32.73501 323.9740 3.052946 1.073363 23.56485    6  1.42    1.0
##   BLCond     Tair    Tleaf     TBlk     CO2R     CO2S      H2OR     H2OS
## 1   2.84 18.39852 18.63775 17.49838 400.1588 393.6112  9.058554 11.26330
## 2   2.84 18.39836 18.63430 17.49829 400.1564 393.6288  9.057750 11.26065
## 3   2.84 18.39805 18.63184 17.49820 400.1614 393.6073  9.058558 11.25777
## 4   2.84 18.39771 18.62778 17.49801 400.1612 393.5934  9.059276 11.25522
## 5   2.84 18.39773 18.62634 17.49825 400.1637 393.5943  9.059454 11.25328
## 6   2.84 21.69264 23.56485 20.10246 400.3486 387.4339 15.702841 19.29936
##       RH_R     RH_S     Flow     PARi     PARo    Press     CsMch      HsMch
## 1 40.58222 50.45947 499.6529 2000.318 473.8944 95.14421 -1.063225  0.1971472
## 2 40.57902 50.44809 499.6664 2000.272 374.8035 95.14422 -1.063225  0.1971472
## 3 40.58344 50.43618 499.6672 2000.276 369.1551 95.14424 -1.063225  0.1971472
## 4 40.58775 50.42611 499.6747 2000.251 333.8403 95.14480 -1.063225  0.1971472
## 5 40.58854 50.41741 499.6718 2000.256 248.9835 95.14486 -1.063225  0.1971472
## 6 57.62043 70.81761 499.4868 1999.142 631.7614 95.55821 -0.667627 -0.1099110
##      CsMchSD     HsMchSD    CrMchSD     HrMchSD   StableF   BLCslope BLCoffst
## 1 0.02566306 0.001353715 0.02074798 0.002030001 1.0000000 -0.2195652 2.737391
## 2 0.02566306 0.001353715 0.02074798 0.002030001 1.0000000 -0.2195652 2.737391
## 3 0.02566306 0.001353715 0.02074798 0.002030001 1.0000000 -0.2195652 2.737391
## 4 0.02566306 0.001353715 0.02074798 0.002030001 1.0000000 -0.2195652 2.737391
## 5 0.02566306 0.001353715 0.02074798 0.002030001 1.0000000 -0.2195652 2.737391
## 6 0.00000000 0.000000000 0.00000000 0.000000000 0.6666667 -0.2195652 2.737391
##   f_parin f_parout alphaK Status       fda       Trans   Tair_K  Twall_K
## 1       1        0   0.16 111115 0.8327548 0.001856931 291.7878 291.5485
## 2       1        0   0.16 111115 0.8327773 0.001855420 291.7843 291.5484
## 3       1        0   0.16 111115 0.8327787 0.001852310 291.7818 291.5481
## 4       1        0   0.16 111115 0.8327912 0.001849580 291.7778 291.5477
## 5       1        0   0.16 111115 0.8327864 0.001847782 291.7763 291.5477
## 6       1        0   0.16 111115 0.8324780 0.003052946 296.7148 294.8426
##    R.W.m2.    Tl.Ta  SVTleaf    h2o_i  h20diff    CTair   SVTair  CndTotal
## 1 320.0509 2.699993 2.155823 22.65848 11.39518 18.51814 2.139739 0.1601937
## 2 320.0436 2.701089 2.155358 22.65359 11.39294 18.51633 2.139497 0.1600955
## 3 320.0442 2.702941 2.155025 22.65009 11.39232 18.51495 2.139311 0.1598363
## 4 320.0401 2.704744 2.154478 22.64420 11.38898 18.51275 2.139017 0.1596482
## 5 320.0409 2.705846 2.154283 22.64214 11.38886 18.51203 2.138921 0.1594950
## 6 319.8627 1.871631 2.917576 30.53192 11.23256 22.62874 2.756980 0.2650225
##     vp_kPa     VpdA    CndCO2    Ci_Pa     Ci.Ca    RHsfc    C2sfc      AHs.Cs
## 1 1.071638 1.068101 0.1009396 32.35722 0.8640152 51.72697 391.3668 0.006240530
## 2 1.071386 1.068111 0.1008771 32.37116 0.8643486 51.72446 391.3920 0.006218764
## 3 1.071112 1.068199 0.1007125 32.34008 0.8635658 51.71510 391.3593 0.006249092
## 4 1.070876 1.068141 0.1005930 32.32211 0.8631116 51.71351 391.3394 0.006265825
## 5 1.070692 1.068229 0.1004957 32.31605 0.8629470 51.70697 391.3394 0.006267671
## 6 1.844213 0.912767 0.1678912 30.95837 0.8362046 65.79124 382.8855 0.016441415
summary(z)
##      Date                Obs            Order            ID           
##  Length:954         Min.   :  1.0   Min.   :  1.0   Length:954        
##  Class :character   1st Qu.: 44.0   1st Qu.:239.2   Class :character  
##  Mode  :character   Median :101.0   Median :477.5   Mode  :character  
##                     Mean   :105.2   Mean   :477.5                     
##                     3rd Qu.:161.0   3rd Qu.:715.8                     
##                     Max.   :251.0   Max.   :954.0                     
##    species            treatment         HHMMSS              FTime        
##  Length:954         Min.   :0.0000   Length:954         Min.   :   32.5  
##  Class :character   1st Qu.:0.0000   Class :character   1st Qu.: 2661.2  
##  Mode  :character   Median :1.0000   Mode  :character   Median : 5740.8  
##                     Mean   :0.5839                      Mean   : 6146.8  
##                     3rd Qu.:1.0000                      3rd Qu.: 9135.9  
##                     Max.   :1.0000                      Max.   :15848.0  
##      EBal.       Photo              Cond            intr_WUE     
##  Min.   :0   Min.   : 0.6562   Min.   :0.01017   Min.   : 12.29  
##  1st Qu.:0   1st Qu.: 6.2987   1st Qu.:0.11003   1st Qu.: 45.26  
##  Median :0   Median : 8.5409   Median :0.15859   Median : 55.29  
##  Mean   :0   Mean   : 8.8544   Mean   :0.18501   Mean   : 56.56  
##  3rd Qu.:0   3rd Qu.:11.4125   3rd Qu.:0.23057   3rd Qu.: 66.34  
##  Max.   :0   Max.   :18.8356   Max.   :0.60011   Max.   :134.70  
##        Ci            Trmmol            VpdL            CTleaf           Area  
##  Min.   :170.3   Min.   :0.2255   Min.   :0.7924   Min.   :17.25   Min.   :6  
##  1st Qu.:260.2   1st Qu.:1.9528   1st Qu.:1.3125   1st Qu.:23.57   1st Qu.:6  
##  Median :279.3   Median :2.6025   Median :1.5733   Median :26.34   Median :6  
##  Mean   :280.1   Mean   :2.7570   Mean   :1.6699   Mean   :25.74   Mean   :6  
##  3rd Qu.:299.6   3rd Qu.:3.5422   3rd Qu.:2.0102   3rd Qu.:27.85   3rd Qu.:6  
##  Max.   :362.6   Max.   :7.5769   Max.   :3.4890   Max.   :35.05   Max.   :6  
##      BLC_1          StmRat           BLCond           Tair      
##  Min.   :1.42   Min.   :0.5000   Min.   :2.556   Min.   :17.81  
##  1st Qu.:1.42   1st Qu.:0.5000   1st Qu.:2.840   1st Qu.:21.66  
##  Median :1.42   Median :1.0000   Median :2.840   Median :25.10  
##  Mean   :1.42   Mean   :0.7573   Mean   :2.771   Mean   :24.36  
##  3rd Qu.:1.42   3rd Qu.:1.0000   3rd Qu.:2.840   3rd Qu.:26.15  
##  Max.   :1.42   Max.   :1.0000   Max.   :2.840   Max.   :34.91  
##      Tleaf            TBlk            CO2R            CO2S      
##  Min.   :17.25   Min.   :17.48   Min.   :398.5   Min.   :337.6  
##  1st Qu.:23.57   1st Qu.:20.10   1st Qu.:399.9   1st Qu.:380.9  
##  Median :26.34   Median :23.71   Median :400.0   Median :387.4  
##  Mean   :25.74   Mean   :23.43   Mean   :400.0   Mean   :383.3  
##  3rd Qu.:27.85   3rd Qu.:25.12   3rd Qu.:400.2   3rd Qu.:390.9  
##  Max.   :35.05   Max.   :34.97   Max.   :401.7   Max.   :399.1  
##       H2OR             H2OS             RH_R            RH_S      
##  Min.   : 5.776   Min.   : 6.367   Min.   :22.19   Min.   :27.07  
##  1st Qu.: 9.657   1st Qu.:12.288   1st Qu.:34.43   1st Qu.:46.68  
##  Median :14.790   Median :17.641   Median :41.08   Median :54.23  
##  Mean   :13.494   Mean   :17.968   Mean   :41.45   Mean   :54.49  
##  3rd Qu.:16.917   3rd Qu.:22.055   3rd Qu.:47.76   3rd Qu.:63.58  
##  Max.   :21.307   Max.   :36.216   Max.   :62.67   Max.   :78.17  
##       Flow            PARi           PARo             Press      
##  Min.   :199.5   Min.   :1884   Min.   :  32.74   Min.   :95.14  
##  1st Qu.:499.5   1st Qu.:1999   1st Qu.: 370.70   1st Qu.:95.55  
##  Median :499.5   Median :2000   Median :1453.50   Median :95.70  
##  Mean   :430.4   Mean   :2000   Mean   :1170.34   Mean   :95.75  
##  3rd Qu.:499.6   3rd Qu.:2001   3rd Qu.:1814.03   3rd Qu.:96.35  
##  Max.   :499.8   Max.   :2003   Max.   :2304.63   Max.   :96.54  
##      CsMch             HsMch             CsMchSD           HsMchSD        
##  Min.   :-1.0632   Min.   :-0.22468   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:-0.6676   1st Qu.:-0.10991   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :-0.6676   Median : 0.03608   Median :0.02566   Median :0.001354  
##  Mean   :-0.1372   Mean   : 0.04782   Mean   :0.03284   Mean   :0.004677  
##  3rd Qu.: 0.7622   3rd Qu.: 0.11395   3rd Qu.:0.05226   3rd Qu.:0.008293  
##  Max.   : 1.6566   Max.   : 0.19715   Max.   :0.07070   Max.   :0.012293  
##     CrMchSD           HrMchSD            StableF          BLCslope      
##  Min.   :0.00000   Min.   :0.000000   Min.   :0.3333   Min.   :-0.2196  
##  1st Qu.:0.00000   1st Qu.:0.000000   1st Qu.:0.6667   1st Qu.:-0.2196  
##  Median :0.02075   Median :0.002030   Median :1.0000   Median :-0.2196  
##  Mean   :0.02724   Mean   :0.004634   Mean   :0.8326   Mean   :-0.2196  
##  3rd Qu.:0.02991   3rd Qu.:0.009385   3rd Qu.:1.0000   3rd Qu.:-0.2196  
##  Max.   :0.09885   Max.   :0.010118   Max.   :1.0000   Max.   :-0.2196  
##     BLCoffst        f_parin     f_parout     alphaK         Status      
##  Min.   :2.737   Min.   :1   Min.   :0   Min.   :0.16   Min.   :111115  
##  1st Qu.:2.737   1st Qu.:1   1st Qu.:0   1st Qu.:0.16   1st Qu.:111115  
##  Median :2.737   Median :1   Median :0   Median :0.16   Median :111115  
##  Mean   :2.737   Mean   :1   Mean   :0   Mean   :0.16   Mean   :111115  
##  3rd Qu.:2.737   3rd Qu.:1   3rd Qu.:0   3rd Qu.:0.16   3rd Qu.:111115  
##  Max.   :2.737   Max.   :1   Max.   :0   Max.   :0.16   Max.   :111115  
##       fda             Trans               Tair_K         Twall_K     
##  Min.   :0.3325   Min.   :0.0002255   Min.   :290.4   Min.   :291.0  
##  1st Qu.:0.8325   1st Qu.:0.0019528   1st Qu.:296.7   1st Qu.:294.8  
##  Median :0.8326   Median :0.0026025   Median :299.5   Median :298.3  
##  Mean   :0.7173   Mean   :0.0027570   Mean   :298.9   Mean   :297.5  
##  3rd Qu.:0.8327   3rd Qu.:0.0035422   3rd Qu.:301.0   3rd Qu.:299.3  
##  Max.   :0.8330   Max.   :0.0075769   Max.   :308.2   Max.   :308.1  
##     R.W.m2.          Tl.Ta             SVTleaf          h2o_i      
##  Min.   :301.5   Min.   :-0.01955   Min.   :1.976   Min.   :20.75  
##  1st Qu.:319.9   1st Qu.: 1.69136   1st Qu.:2.918   1st Qu.:30.54  
##  Median :320.0   Median : 2.14083   Median :3.442   Median :35.87  
##  Mean   :320.0   Mean   : 2.07646   Mean   :3.393   Mean   :35.40  
##  3rd Qu.:320.1   3rd Qu.: 2.44755   3rd Qu.:3.761   3rd Qu.:39.32  
##  Max.   :320.5   Max.   : 3.49513   Max.   :5.663   Max.   :58.76  
##     h20diff           CTair           SVTair         CndTotal      
##  Min.   : 8.211   Min.   :17.56   Min.   :2.014   Min.   :0.01013  
##  1st Qu.:13.729   1st Qu.:22.63   1st Qu.:2.757   1st Qu.:0.10593  
##  Median :16.466   Median :25.61   Median :3.298   Median :0.15021  
##  Mean   :17.436   Mean   :25.05   Mean   :3.265   Mean   :0.16996  
##  3rd Qu.:21.004   3rd Qu.:26.98   3rd Qu.:3.574   3rd Qu.:0.21325  
##  Max.   :36.205   Max.   :34.93   Max.   :5.625   Max.   :0.49543  
##      vp_kPa            VpdA            CndCO2             Ci_Pa      
##  Min.   :0.6063   Min.   :0.7173   Min.   :0.006337   Min.   :16.27  
##  1st Qu.:1.1762   1st Qu.:1.1688   1st Qu.:0.066560   1st Qu.:24.92  
##  Median :1.6878   Median :1.4140   Median :0.094598   Median :26.71  
##  Mean   :1.7228   Mean   :1.5419   Mean   :0.107462   Mean   :26.82  
##  3rd Qu.:2.1094   3rd Qu.:1.8666   3rd Qu.:0.134738   3rd Qu.:28.54  
##  Max.   :3.4914   Max.   :3.4515   Max.   :0.317606   Max.   :34.72  
##      Ci.Ca            RHsfc           C2sfc           AHs.Cs         
##  Min.   :0.4277   Min.   :24.46   Min.   :328.7   Min.   :0.0004838  
##  1st Qu.:0.6785   1st Qu.:44.21   1st Qu.:375.1   1st Qu.:0.0075363  
##  Median :0.7341   Median :50.25   Median :383.1   Median :0.0117282  
##  Mean   :0.7312   Mean   :51.77   Mean   :379.0   Mean   :0.0130881  
##  3rd Qu.:0.7858   3rd Qu.:59.90   3rd Qu.:387.6   3rd Qu.:0.0164373  
##  Max.   :0.9253   Max.   :79.31   Max.   :398.8   Max.   :0.0444657
q <-z$Cond
summary(q)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01017 0.11003 0.15859 0.18501 0.23057 0.60011
p1 <- ggplot(data=z, aes(x=Cond, y=..density..)) + #..density.. creates a proportion 
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) 

print(p1) 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p1 <-  p1 +  geom_density(linetype="dotted",size=0.75) 
print(p1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#added emperical density vurve. This is an empirical curve that is fitted to the data. It does not assume any particular probability distribution, but it smooths out the shape of the histogram


#Next, fit a normal distribution to your data and grab the maximum likelihood estimators of the two parameters of the normal, the mean and the variance:
normPars <- fitdistr(z$Cond,"normal")
print(normPars)
##       mean           sd     
##   0.185013609   0.110777172 
##  (0.003586543) (0.002536069)
str(normPars)
## List of 5
##  $ estimate: Named num [1:2] 0.185 0.111
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ sd      : Named num [1:2] 0.00359 0.00254
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ vcov    : num [1:2, 1:2] 1.29e-05 0.00 0.00 6.43e-06
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "mean" "sd"
##   .. ..$ : chr [1:2] "mean" "sd"
##  $ n       : int 954
##  $ loglik  : num 745
##  - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"] # note structure of getting a named attribute
##      mean 
## 0.1850136
#Now let’s call the dnorm function inside ggplot’s stat_function to generate the probability density for the normal distribution. Read about stat_function in the help system to see how you can use this to add a smooth function to any ggplot. Note that we first get the maximum likelihood parameters for a normal distribution fitted to thse data by calling fitdistr. Then we pass those parameters (meanML and sdML to stat_function)

#plot normal probabilyt density 
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]

xval <- seq(0,max(z$Cond),len=length(z$Cond))

 stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(z$Cond), args = list(mean = meanML, sd = sdML))
 p1 + stat
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Plot exponential probablity density  
expoPars <- fitdistr(z$Cond,"exponential")
rateML <- expoPars$estimate["rate"]

stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(z$Cond), args = list(rate=rateML))
 p1 + stat + stat2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#plot uniform probability density  
 stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(z$Cond), args = list(min=min(z$Cond), max=max(z$Cond)))
 p1 + stat + stat2 + stat3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# plot gamma probablity density 
gammaPars <- fitdistr(z$Cond,"gamma")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]

stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(z$Cond), args = list(shape=shapeML, rate=rateML))
 p1 + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Plot beta probability density 
 pSpecial <- ggplot(data=z, aes(x=Cond/(max(Cond + 0.1)), y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) + 
  xlim(c(0,1)) +
  geom_density(size=0.75,linetype="dotted")

betaPars <- fitdistr(x=z$Cond/max(z$Cond + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]

statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(z$Comp), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).

q <- rgamma(n = 500, shapeML, rateML)
q#use shapeML and rateML in rgamma 
##   [1] 0.32208479 0.16502619 0.17464312 0.14842475 0.29346791 0.20664408
##   [7] 0.12813399 0.21940998 0.29513711 0.40204143 0.07573097 0.09449199
##  [13] 0.16535670 0.37917874 0.25384960 0.20446661 0.10590658 0.15752687
##  [19] 0.23188440 0.22151754 0.36276443 0.21250439 0.14530906 0.31399371
##  [25] 0.13401606 0.31507650 0.45698980 0.20050574 0.09505817 0.03753028
##  [31] 0.11960317 0.14464823 0.13092897 0.21348309 0.02560610 0.08728100
##  [37] 0.16396005 0.14427719 0.19613877 0.10756797 0.28086103 0.18555377
##  [43] 0.10980657 0.17416338 0.06908640 0.50312772 0.14862841 0.01649639
##  [49] 0.28607640 0.20737945 0.02593873 0.27762208 0.36798280 0.11598623
##  [55] 0.22367543 0.15279207 0.06467828 0.21210642 0.19194515 0.22820546
##  [61] 0.39979422 0.21098277 0.16923410 0.07829016 0.20674679 0.20967638
##  [67] 0.05898751 0.10990839 0.10912834 0.13033732 0.06208470 0.14747564
##  [73] 0.19606804 0.06989757 0.22355197 0.09567042 0.04292593 0.14311150
##  [79] 0.14347675 0.10320793 0.31821338 0.47227481 0.16159965 0.15987425
##  [85] 0.05428556 0.10014716 0.05174292 0.06645900 0.05777614 0.08831330
##  [91] 0.27697868 0.10746113 0.44089793 0.13909675 0.11668900 0.47237546
##  [97] 0.08138555 0.17133970 0.11085973 0.19266358 0.27191676 0.08538251
## [103] 0.15091124 0.14013227 0.22865539 0.06634284 0.01984652 0.26180584
## [109] 0.10330560 0.01833275 0.36347950 0.49340486 0.19153197 0.41998218
## [115] 0.12942955 0.08619411 0.31517099 0.28181402 0.27071831 0.16429987
## [121] 0.13827723 0.05878797 0.14558775 0.04192771 0.14363726 0.18520797
## [127] 0.46092490 0.16069960 0.23190248 0.28899199 0.17773008 0.01593680
## [133] 0.27299472 0.24032252 0.10786479 0.11809891 0.10389904 0.19879350
## [139] 0.34454430 0.09394508 0.25433860 0.27643606 0.03133894 0.08604417
## [145] 0.06667884 0.16102953 0.21970963 0.39093048 0.20761289 0.40217730
## [151] 0.26845414 0.16928411 0.04001114 0.14882146 0.39745643 0.21898595
## [157] 0.06287860 0.08911313 0.11387633 0.18591362 0.24574645 0.37719593
## [163] 0.04885887 0.23794448 0.25561148 0.34846201 0.30608543 0.13947636
## [169] 0.28104549 0.41840388 0.31743773 0.05062520 0.36942851 0.18010859
## [175] 0.19588442 0.16574779 0.13560240 0.28067479 0.16262407 0.02374562
## [181] 0.20503520 0.27444841 0.24872110 0.07530378 0.52345757 0.21396158
## [187] 0.08995805 0.06561010 0.17075928 0.16200117 0.05132997 0.07157841
## [193] 0.15028871 0.14944351 0.52201864 0.14580799 0.16028939 0.22724558
## [199] 0.09557811 0.15729121 0.06967339 0.16108364 0.09008520 0.14273723
## [205] 0.10049610 0.06888423 0.38753634 0.12796369 0.10331389 0.18996086
## [211] 0.42027145 0.16963734 0.15119778 0.12996284 0.07541334 0.11059010
## [217] 0.16388222 0.09061178 0.02736396 0.14220284 0.27064894 0.22595467
## [223] 0.16922261 0.46462537 0.10529734 0.04268695 0.45412548 0.34553530
## [229] 0.08915614 0.16582475 0.28940417 0.30565730 0.17873830 0.30028250
## [235] 0.16404454 0.10039177 0.16901978 0.22888024 0.25064926 0.11852583
## [241] 0.62627685 0.31440989 0.19677493 0.26766765 0.21416145 0.11445957
## [247] 0.06763977 0.40073396 0.03829374 0.12253443 0.26095524 0.40054753
## [253] 0.03421687 0.37363799 0.09430895 0.31604726 0.24531533 0.03708348
## [259] 0.25814694 0.11648023 0.26451557 0.18501149 0.16053748 0.07724244
## [265] 0.08445782 0.08372048 0.22922735 0.18785953 0.16225066 0.07304349
## [271] 0.13435220 0.19599565 0.06203358 0.11241962 0.07061495 0.15488219
## [277] 0.10944902 0.10512018 0.19264989 0.12439285 0.25181138 0.28858682
## [283] 0.13478416 0.37420267 0.06843646 0.31541633 0.38201632 0.30648446
## [289] 0.09680178 0.16690059 0.08098710 0.07713143 0.36194223 0.26340957
## [295] 0.04646732 0.30385399 0.06248841 0.10337545 0.20871974 0.44557255
## [301] 0.09369726 0.15750522 0.15840900 0.09444663 0.04592926 0.19349788
## [307] 0.27119772 0.24799463 0.15429049 0.12606470 0.13119856 0.14344828
## [313] 0.10941300 0.16222728 0.36066534 0.15447797 0.25420624 0.07734047
## [319] 0.25810849 0.26730461 0.16429694 0.24066740 0.49454306 0.23104672
## [325] 0.23818038 0.06834192 0.16350401 0.07320763 0.22783674 0.08894629
## [331] 0.11758456 0.17935664 0.04226176 0.08991218 0.22551279 0.18471980
## [337] 0.55775611 0.14560192 0.17982620 0.19793526 0.09782954 0.19942675
## [343] 0.19075196 0.16735213 0.16008160 0.14077441 0.15599649 0.10184174
## [349] 0.21072223 0.10124454 0.11139214 0.13785833 0.17819500 0.64512662
## [355] 0.11160631 0.17124090 0.45557887 0.07675279 0.37183217 0.34997798
## [361] 0.07644887 0.18409518 0.13812905 0.17722535 0.11291731 0.17491198
## [367] 0.12621475 0.15124941 0.22822138 0.13617275 0.12290063 0.21689431
## [373] 0.15767379 0.17497917 0.07973624 0.08067618 0.09117057 0.23900610
## [379] 0.05340955 0.11811814 0.15555478 0.15565393 1.02133802 0.17831604
## [385] 0.27434840 0.29831782 0.17425190 0.34845733 0.08105646 0.07303920
## [391] 0.26173147 0.04220562 0.31376495 0.08380645 0.10798210 0.13363024
## [397] 0.21357284 0.06800050 0.43269557 0.14613258 0.04626655 0.10067550
## [403] 0.03521479 0.24813241 0.19864691 0.05913707 0.22334429 0.09396565
## [409] 0.22923659 0.08089313 0.06534297 0.10599853 0.21549171 0.13685784
## [415] 0.17031137 0.22749680 0.12644336 0.08160246 0.06584935 0.06733512
## [421] 0.08792721 0.07544396 0.16155821 0.05598919 0.33296598 0.19984945
## [427] 0.14282646 0.33365180 0.17374673 0.15197354 0.17244417 0.07949644
## [433] 0.22098679 0.18023520 0.20342663 0.31698821 0.20122702 0.74786024
## [439] 0.53201868 0.03488785 0.22032207 0.10857365 0.14947936 0.03852039
## [445] 0.02751338 0.16555482 0.26035707 0.31539414 0.12515969 0.21420176
## [451] 0.25636185 0.26715035 0.09141504 0.12075731 0.25563666 0.15279523
## [457] 0.39058998 0.44218160 0.06982432 0.44491884 0.07871120 0.58894054
## [463] 0.11361491 0.19109724 0.13988229 0.33356062 0.19887296 0.20625765
## [469] 0.08969601 0.09809442 0.06146988 0.25223910 0.10634202 0.10724863
## [475] 0.18963684 0.05752828 0.21476279 0.29144553 0.14617357 0.08962697
## [481] 0.37859953 0.12640335 0.11167051 0.17589003 0.15246035 0.10680351
## [487] 0.17553008 0.11223948 0.29774506 0.34777359 0.16027488 0.23779105
## [493] 0.12158375 0.61181596 0.22014685 0.19470956 0.16335686 0.10596677
## [499] 0.21075838 0.14051538
q <- data.frame(1:500, q)
head(q)
##   X1.500         q
## 1      1 0.3220848
## 2      2 0.1650262
## 3      3 0.1746431
## 4      4 0.1484248
## 5      5 0.2934679
## 6      6 0.2066441
normPars <- fitdistr(q$q,"normal")
print(normPars)
##       mean           sd     
##   0.189240132   0.122253370 
##  (0.005467337) (0.003865991)
str(normPars)
## List of 5
##  $ estimate: Named num [1:2] 0.189 0.122
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ sd      : Named num [1:2] 0.00547 0.00387
##   ..- attr(*, "names")= chr [1:2] "mean" "sd"
##  $ vcov    : num [1:2, 1:2] 2.99e-05 0.00 0.00 1.49e-05
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : chr [1:2] "mean" "sd"
##   .. ..$ : chr [1:2] "mean" "sd"
##  $ n       : int 500
##  $ loglik  : num 341
##  - attr(*, "class")= chr "fitdistr"
normPars$estimate["mean"]
##      mean 
## 0.1892401
meanML <- normPars$estimate["mean"]
sdML <- normPars$estimate["sd"]

xval <- seq(0,max(q$q),len=length(q$q))

 stat <- stat_function(aes(x = xval, y = ..y..), fun = dnorm, colour="red", n = length(q$q), args = list(mean = meanML, sd = sdML)) #added this to histogram

expoPars <- fitdistr(q$q,"exponential")
rateML <- expoPars$estimate["rate"]

stat2 <- stat_function(aes(x = xval, y = ..y..), fun = dexp, colour="blue", n = length(q$q), args = list(rate=rateML)) #added this to histogram
 
stat3 <- stat_function(aes(x = xval, y = ..y..), fun = dunif, colour="darkgreen", n = length(q$q), args = list(min=min(q$q), max=max(q$q)))
 #added to histogram

# plot gamma probablity density 
gammaPars <- fitdistr(q$q,"gamma")
shapeML <- gammaPars$estimate["shape"]
rateML <- gammaPars$estimate["rate"]

stat4 <- stat_function(aes(x = xval, y = ..y..), fun = dgamma, colour="brown", n = length(q$q), args = list(shape=shapeML, rate=rateML))

 
ggplot(data=q, aes(x=q, y=..density..)) + #..density.. creates a proportion 
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) +  
  geom_density(linetype="dotted",size=0.75) + stat + stat2 + stat3 + stat4
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Plot beta probability density 
 pSpecial <- ggplot(data=q, aes(x=q/(max(q + 0.1)), y=..density..)) +
  geom_histogram(color="grey60",fill="cornsilk",size=0.2) + 
  xlim(c(0,1)) +
  geom_density(size=0.75,linetype="dotted")

betaPars <- fitdistr(x=q$q/max(q$q + 0.1),start=list(shape1=1,shape2=2),"beta")
## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced

## Warning in densfun(x, parm[1], parm[2], ...): NaNs produced
shape1ML <- betaPars$estimate["shape1"]
shape2ML <- betaPars$estimate["shape2"]

statSpecial <- stat_function(aes(x = xval, y = ..y..), fun = dbeta, colour="orchid", n = length(q$q), args = list(shape1=shape1ML,shape2=shape2ML))
pSpecial + statSpecial
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).

How do the two histogram profiles compare? Do you think the model is doing a good job of simulating realistic data that match your original measurements? Why or why not?

the two histograms are fairly similar in terms of the overall smoothing curve; however, the simulated histogram had a slightly lower peak compared to the real histogram. Gamma curves were simlar

If you have entered a large data frame with many columns, try running all of the code on a different variable to see how the simulation performs.

Once we get a little bit more R coding under our belts, we will return to the problem of simulating data and use some of this code again.