#################################################### # # SCRIPT TO COMPUTE AVERAGE PREDICTED PROBABILITY CHANGE # Location-sensitive Observed Values Approach (lOVA) # # --Please cite-- # Author: Bastian Becker # Title: Behind the Curve and Beyond... # Journal: Metodoloski Zvezki - Advances in Methodology and Statistics # # Contact: bstnbckr@gmail.com # Website: www.beckerbastian.net # Twitter: @bstnbckr # #################################################### install.packages("mvtnorm") ## Compute average predicted probability changes for binary outcomes (Simulation-based bootstraping) ## Computes probability changes for changes in X (Interquartile differences, Standard deviation differences, First differences) ## Return results based on lOVA and OVA approach # load the following function and execute it with the following three objects # FRML: Model specification, e.g. "vote.trump ~ age + income + race" # DT: Your data, no missingness on variables included in model allowed # CF: Variable for which counterfactuals are to be computed, e.g. "income" # CONF: Level of confidence to construct confidence interval, i.e. .99 for 99%-confidence interval logit.lOVA<-function(FRML, DT, CF, CONF=.95){ # load library library(mvtnorm) # define inverse logit invlog<-function(x){exp(x)/(1+exp(x))} # set alpha if(is.na(CONF)){CONF=.05}else{ CONF<-round(CONF,2) CONF<-min(CONF,.99) CONF<-max(CONF,.01) } # estimate model logit_out <- glm(as.formula(FRML), data = DT, family = binomial(link = "logit")) # preparation prediction DT <- model.matrix(logit_out) DT.cf1 <- DT DT.cf2 <- DT x.iq1<-quantile(DT[,CF], probs = .25) x.iq2<-quantile(DT[,CF], probs = .75) x.sd<-sd(DT[,CF]) x.mean<-mean(DT[,CF]) pred.lova.iq<-list(); pred.lova.sd<-list(); pred.lova.fd<-list() pred.ova.iq<-list(); pred.ova.sd<-list(); pred.ova.fd<-list(); for(b in 1:5000){ beta<-rmvnorm(1,coef(logit_out),vcov(logit_out)) # lOVA DT.cf1[, CF]<- DT[, CF] - .5*(x.iq2-x.iq1) DT.cf2[, CF]<- DT[, CF] + .5*(x.iq2-x.iq1) pred.lova.iq[[b]]<-mean( invlog(DT.cf2%*%t(beta)) - invlog(DT.cf1%*%t(beta)) ) DT.cf1[, CF]<- DT[, CF] - .5*x.sd DT.cf2[, CF]<- DT[, CF] + .5*x.sd pred.lova.sd[[b]]<-mean( invlog(DT.cf2%*%t(beta)) - invlog(DT.cf1%*%t(beta)) ) DT.cf1[, CF]<- DT[, CF] - .5 DT.cf2[, CF]<- DT[, CF] + .5 pred.lova.fd[[b]]<-mean( invlog(DT.cf2%*%t(beta)) - invlog(DT.cf1%*%t(beta)) ) # OVA DT.cf1[, CF]<-x.iq1 DT.cf2[, CF]<-x.iq2 pred.ova.iq[[b]]<-mean( invlog(DT.cf2%*%t(beta)) - invlog(DT.cf1%*%t(beta)) ) DT.cf1[, CF]<-x.mean DT.cf2[, CF]<-x.mean+x.sd pred.ova.sd[[b]]<-mean( invlog(DT.cf2%*%t(beta)) - invlog(DT.cf1%*%t(beta)) ) DT.cf1[, CF]<-x.mean DT.cf2[, CF]<-x.mean+1 pred.ova.fd[[b]]<-mean( invlog(DT.cf2%*%t(beta)) - invlog(DT.cf1%*%t(beta)) ) } # Prediction summary pred.lova.iq<-unlist(pred.lova.iq); pred.lova.sd<-unlist(pred.lova.sd); pred.lova.fd<-unlist(pred.lova.fd) pred.ova.iq<-unlist(pred.ova.iq); pred.ova.sd<-unlist(pred.ova.sd); pred.ova.fd<-unlist(pred.ova.fd); avg.lova <-round(c(mean(pred.lova.iq),mean(pred.lova.sd),mean(pred.lova.fd)),5) avg.ova <-round(c(mean(pred.ova.iq),mean(pred.ova.sd),mean(pred.ova.fd)),5) se.lova <-round(c(sd(pred.lova.iq),sd(pred.lova.sd),sd(pred.lova.fd)),5) se.ova <-round(c(sd(pred.ova.iq),sd(pred.ova.sd),sd(pred.ova.fd)),5) ci.p <- c((1-CONF)/2,1-(1-CONF)/2) ci.lova <-round(rbind(quantile(pred.lova.iq,probs=ci.p),quantile(pred.lova.sd,probs=ci.p),quantile(pred.lova.fd,probs=ci.p)),5) ci.ova <-round(rbind(quantile(pred.ova.iq,probs=ci.p),quantile(pred.ova.sd,probs=ci.p),quantile(pred.ova.fd,probs=ci.p)),5) ci.lova<-paste("[",ci.lova[,1],", ", ci.lova[,2],"]", sep="") ci.ova<-paste("[",ci.ova[,1],", ", ci.ova[,2],"]", sep="") # Output table.out<-cbind(avg.lova,se.lova,ci.lova,avg.ova,se.ova,ci.ova) colnames(table.out)<-c("Mean (lOVA)","S.E.",paste("CI-",CONF*100,"%",sep="") ,"Mean (OVA)","S.E.",paste("CI-",CONF*100,"%",sep="")) rownames(table.out)<-c("Interquartile", "Standard deviation", "First difference") return(table.out) }