load libraries
library(ggplot2)
library(dplyr)
library(broom)
Read in data, on air quality metrics
airquality <- read.csv('airquality.csv')
Filter for only complete cases
aircomplete <- airquality %>% filter(complete.cases(airquality))
Create a plot comparing, Ozone levels with Temperature
p <- ggplot(airquality) + geom_point(aes(x=Temp, y=Ozone))
print(p)
## Warning: Removed 37 rows containing missing values (geom_point).
Let's change the colors of solar radiation, and save it in a new variable
p <- p + scale_color_gradient(low = "orange", high = "red")
Let's run a linear model ozone = a + bT + e
ozone_model <- lm(Ozone ~ Temp, data = aircomplete)
ozone_model %>% summary()
##
## Call:
## lm(formula = Ozone ~ Temp, data = aircomplete)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.922 -17.459 -0.874 10.444 118.078
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -147.6461 18.7553 -7.872 2.76e-12 ***
## Temp 2.4391 0.2393 10.192 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.92 on 109 degrees of freedom
## Multiple R-squared: 0.488, Adjusted R-squared: 0.4833
## F-statistic: 103.9 on 1 and 109 DF, p-value: < 2.2e-16
Are there any other parameters influencing ozone?
two_model <- lm(Ozone ~ Temp + Solar.R, data = aircomplete)
two_model %>% summary()
##
## Call:
## lm(formula = Ozone ~ Temp + Solar.R, data = aircomplete)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.610 -15.976 -2.928 12.371 115.555
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -145.70316 18.44672 -7.899 2.53e-12 ***
## Temp 2.27847 0.24600 9.262 2.22e-15 ***
## Solar.R 0.05711 0.02572 2.221 0.0285 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.5 on 108 degrees of freedom
## Multiple R-squared: 0.5103, Adjusted R-squared: 0.5012
## F-statistic: 56.28 on 2 and 108 DF, p-value: < 2.2e-16
Extract the coefficients
ozone_intercept <- ozone_model %>% tidy %>% filter(term == "(Intercept)") %>% select(estimate) %>% unlist()
ozone_slope <- ozone_model %>% tidy %>% filter(term == "Temp") %>% select(estimate) %>% unlist()
Plot the linear model
p2 <- p + geom_abline(intercept = ozone_intercept , slope = ozone_slope)
print(p2)
## Warning: Removed 37 rows containing missing values (geom_point).
aircomplete <- aircomplete %>% mutate(Temp2 = ifelse(Temp < 80, "cool", "warm"))
ggplot(aircomplete) + geom_boxplot(aes(x=Temp2, y=Ozone))