Analyzing air quality

load libraries

library(ggplot2)
library(dplyr)
library(broom)

Read in data, on air quality metrics

airquality <- read.csv('airquality.csv')

Filter for only complete cases

aircomplete <- airquality %>% filter(complete.cases(airquality))

Create a plot comparing, Ozone levels with Temperature

p <- ggplot(airquality) + geom_point(aes(x=Temp, y=Ozone))
print(p)
## Warning: Removed 37 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-4

Let's change the colors of solar radiation, and save it in a new variable

p <- p + scale_color_gradient(low = "orange", high = "red")

Let's run a linear model ozone = a + bT + e

ozone_model <- lm(Ozone ~ Temp, data = aircomplete)
ozone_model %>% summary()
## 
## Call:
## lm(formula = Ozone ~ Temp, data = aircomplete)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -40.922 -17.459  -0.874  10.444 118.078 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -147.6461    18.7553  -7.872 2.76e-12 ***
## Temp           2.4391     0.2393  10.192  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.92 on 109 degrees of freedom
## Multiple R-squared:  0.488,  Adjusted R-squared:  0.4833 
## F-statistic: 103.9 on 1 and 109 DF,  p-value: < 2.2e-16

Are there any other parameters influencing ozone?

two_model <- lm(Ozone ~ Temp + Solar.R, data = aircomplete)
two_model %>% summary()
## 
## Call:
## lm(formula = Ozone ~ Temp + Solar.R, data = aircomplete)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.610 -15.976  -2.928  12.371 115.555 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -145.70316   18.44672  -7.899 2.53e-12 ***
## Temp           2.27847    0.24600   9.262 2.22e-15 ***
## Solar.R        0.05711    0.02572   2.221   0.0285 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23.5 on 108 degrees of freedom
## Multiple R-squared:  0.5103, Adjusted R-squared:  0.5012 
## F-statistic: 56.28 on 2 and 108 DF,  p-value: < 2.2e-16

Extract the coefficients

ozone_intercept <- ozone_model %>% tidy %>% filter(term == "(Intercept)") %>% select(estimate) %>% unlist()
ozone_slope <- ozone_model %>% tidy %>% filter(term == "Temp") %>% select(estimate) %>% unlist()

Plot the linear model

p2 <- p + geom_abline(intercept = ozone_intercept , slope = ozone_slope)
print(p2)
## Warning: Removed 37 rows containing missing values (geom_point).

plot of chunk unnamed-chunk-9

Boxplot

aircomplete <- aircomplete %>% mutate(Temp2 = ifelse(Temp < 80, "cool", "warm"))
ggplot(aircomplete) + geom_boxplot(aes(x=Temp2, y=Ozone))

plot of chunk unnamed-chunk-10