Assessing consistency of OLS amidst omitted variable bias

R Statistics

EDS 241: Environmental Policy Evaluation - Lab 1

Mia Forsline true
2022-01-06

Learning Goals

Load necessary packages

show

Demonstrate the consistency of OLS when the assumptions are upheld

show
set.seed(420) 

bigN <- 10000 

X1 <- runif(bigN, min = 0, max = 10)
u <- rnorm(bigN, mean = 0, sd = 4) 


Y <- 5 + 1.5*X1 + u 
population_data <- data.frame(X1, Y) 

model1 <- lm(formula = Y ~ X1, data = population_data)
huxreg(model1, error_pos="right")
     ────────────────────────────────────────────────────
                                    (1)                  
                      ───────────────────────────────────
       (Intercept)           5.065 ***          (0.079)  
       X1                    1.492 ***          (0.014)  
                      ───────────────────────────────────
       N                 10000                           
       R2                    0.547                       
       logLik           -27873.395                       
       AIC               55752.791                       
     ────────────────────────────────────────────────────
       *** p < 0.001; ** p < 0.01; * p < 0.05.           
Column names: names, model1, NA
show
betahat_output <- matrix(ncol = 2, nrow = bigN)

for (n in 1:bigN) {
  sample <- population_data[1:n,]
  betahat_output[n,] <- lm(Y ~ X1, data = sample)$coefficients
} 

n <- seq(1,bigN)
beta1hat <- betahat_output[,c(2)]
forgraph <- data.frame(n , betahat_output[,c(2)])

Graph the results

show
ggplot(forgraph , aes(x=n, y=beta1hat)) + geom_line(size=0.5, color="blue") +
  geom_hline(yintercept=1.5, size=2, color="red") +
  labs(x="n", y = "Beta1hat") + 
  ggthemes::theme_pander(base_size = 14) 
show
ggsave(filename = "logo.png", width = 5, height = 4, units = "in", dpi = 300)

Demonstrate the lack of OLS consistency due to omitted variables bias

show
X2 = X1 +rnorm(bigN , mean=0 , sd=2.2) 

Y <- 5 + 1.5*X1 + 10*X2 + u
population_data <- data.frame(X1, Y)

model1 <- lm(formula = Y ~ X1, data = population_data)
huxreg(model1, error_pos="right")
     ────────────────────────────────────────────────────
                                    (1)                  
                      ───────────────────────────────────
       (Intercept)           4.960 ***          (0.448)  
       X1                   11.596 ***          (0.077)  
                      ───────────────────────────────────
       N                 10000                           
       R2                    0.692                       
       logLik           -45267.984                       
       AIC               90541.968                       
     ────────────────────────────────────────────────────
       *** p < 0.001; ** p < 0.01; * p < 0.05.           
Column names: names, model1, NA
show
betahat_output <- matrix(ncol = 2, nrow = bigN)

for (n in 1:bigN) {
  sample <- population_data[1:n,]
  betahat_output[n,] <- lm(Y ~ X1, data = sample)$coefficients
} 

n <- seq(1,bigN)
beta1hat <- betahat_output[,c(2)]
forgraph <- data.frame(n , betahat_output[,c(2)])
show
cor(X1,X2)
[1] 0.7973109
show
sd(X1)
[1] 2.890737
show
sd(X2)
[1] 3.663339
show
1.5 + 10*cor(X1,X2)*sd(X2)/sd(X1)
[1] 11.60407

Graph the results

show
ggplot(forgraph , aes(x=n, y=beta1hat)) + geom_line(size=0.5, color="blue") +
  geom_hline(yintercept=1.5, size=2, color="red") +
  labs(x="n", y = "Beta1hat") + 
  ggthemes::theme_pander(base_size = 14)