# /* Examples of dummy dependent variables - LPM, PROBIT, LOGISTIC Regression Models # The probability of labor participation using the Current Population Survey (CPS) Data # Data cps_msa2013 : 2013 Current population survey for 3 MSA # 3 MSA : NY, LA, CHICAGO # Created by Jin Man Lee # Updated: 11/14/2022 */ # Clear all memory and working space rm(list=ls()) # Load all required libraries. If not installed, use "install.packages("foreign") library(foreign) library(dplyr) library(mfx) library(stargazer) # Use the following line if you use R in bigblue server # cps <- read.dta("/var/www/html/jlee141/econdata/cps_data/cps_msa2013.dta") cps <- read.dta("https://bigblue.depaul.edu/jlee141/econdata/cps_data/cps_msa2013.dta") # Create some variables and remove any invalid cases cps1 <- cps %>% mutate(educ92a = as.numeric(educ92)) %>% mutate(educyr = case_when( educ92a %in% 1 ~ 1, educ92a %in% 2 ~ 4, educ92a %in% 3 ~ 6, educ92a %in% 4 ~ 8, educ92a %in% 5 ~ 9, educ92a %in% 6 ~ 10, educ92a %in% 7 ~ 11, educ92a %in% 8:9 ~ 12, educ92a %in% 10:12 ~ 14, educ92a %in% 13 ~ 16, educ92a %in% 14 ~ 18, educ92a %in% 15 ~ 20, educ92a %in% 16 ~ 21)) %>% mutate(edusq = educyr^2) %>% mutate(laborp = ifelse(as.numeric(lfstat) == 1 | as.numeric(lfstat) ==2,1, ifelse(is.na(lfstat),NA,0))) %>% filter(inch_all > 0) %>% mutate(lhincome = log(inch_all)) %>% dplyr::select(lfstat,female,educyr,edusq,lhincome,child,married,age,laborp,wbhao) %>% filter(complete.cases(.)) # Let's make sure the data has any issues summary(cps1) table(cps1$lfstat,cps1$laborp) table(cps1$laborp,cps1$female) # Linear Probability Model ols1 <- lm(laborp ~ female,data=cps1) summary(ols1) ols2 <- lm(laborp ~ wbhao,data=cps1) summary(ols2) ols3 <- lm(laborp ~ age+educyr+married+child+lhincome+female+wbhao,data=cps1) summary(ols3) stargazer(ols1,ols2,ols3,type="text",title = "Comparison the OLS regressions") # Probit Regression Model probit <- glm(laborp ~ age+educyr+married+child+lhincome+female+wbhao,data=cps1, family = binomial(link="probit")) summary(probit) logitmfx(laborp ~ age+educyr+married+child+lhincome+female+wbhao,data=cps1) # Logistic Regression Model logit <- glm(laborp ~ age+educyr+married+child+lhincome+female+wbhao,data=cps1,family = binomial) summary(logit) logitmfx(laborp ~ age+educyr+married+child+lhincome+female+wbhao,data=cps1) stargazer(ols3,probit,logit,type="text")