###############################################################################
# How to the use the function "AICW()"                                        #
#                                                                             #
# Use (if possible) a matrix rather than a dataframe.                         #
#                                                                             #
# Type AICW(matrix, response, covariates, model, nonparam), where "response"  #
# denotes the responsecolumn and "covariates" is a vector of the covariate-   #
# columns. Choose between a linear regression model (model="lm") and a logist.#
# regression model (model="logistic"). Choose nonparam=FALSE if you want      #
# do not want to estimate the weights for AICW nonparametrically              # 
# Note, that this function only considers a MAR Missingness process           #
###############################################################################

# the function needs the following libraries   : mgcv
# the illustration need the following libraries: ---

###################
# An illustration #
###################

# Create Datamatrix (without any missing value)
n<-500

var1   <- runif(n,0,10)
var2   <- rpois(n,10)
var3   <- rexp(n,3)
var4   <- rbinom(n,1,0.2)
mu     <- -4 + 1*var1 - 1.5*var4    # we assume that our responses y1 an y2 depend on var1 and var4
sigma  <- 2
probab <- 1/(1+exp(-mu))
y1 <- rnorm(n, mu, sigma)     
y2 <- rbinom(n,1, probab)

# Set some data to NA
# Mechanism: MAR, as missing values are only in the covariates
# and do only depend on y

# Probablility of missingness
probab <-  1-(1/(0.0005*(y1-40)^2+1))        # missing -> var1 + var2
var1mis <- var1
var2mis <- var2
var3mis <- var3
var4mis <- var4
for(k in 1:n){var1mis[k] <- sample(c(var1[k],NA),1,prob=c(1-probab[k],probab[k]))}
for(k in 1:n){var2mis[k] <- sample(c(var2[k],NA),1,prob=c(1-probab[k],probab[k]))}


# Matrix with missing values in a MAR-setting
myexample <- cbind(y1,y2,var1mis,var2mis,var3mis,var4mis)
colnames(myexample)<-c("y1","y2","x1","x2","x3","x4")


###################################
# Example 1  - linear Regression  #
###################################

## Consider the following three competing models

M1 <- lm(y1~var1)
M2 <- lm(y1~var1+var4)
M3 <- lm(y1~var1+var2+var3+var4)

# Consider AIC for Complete Cases
AIC(M1)
AIC(M2)
AIC(M3)

### Calculate the AICW (based on nonparametrically estimated weights)  

# Model 1: Response is the first column, covariate is in sthird column
AICW(myexample,response=1,covariates=3,model="lm", nonparam="TRUE")
# Model 2: Response is the first column, covariates are in third and sixth column
AICW(myexample,response=1,covariates=c(3,6),model="lm", nonparam="TRUE")
# Model 3: Response is the first column, covariates are in third, fourth, fifth and sixth column
AICW(myexample,response=1,covariates=c(3,4,5,6),model="lm", nonparam="TRUE")

### Calculate the AICW (based on parametrically estimated weights)  

# Model 1: Response is the first column, covariate is in third column
AICW(myexample,response=1,covariates=3,model="lm", nonparam="FALSE")
# Model 2: Response is the first column, covariates are in third and sixth column
AICW(myexample,response=1,covariates=c(3,6),model="lm", nonparam="FALSE")
# Model 3: Response is the first column, covariates are in third, fourth, fifth and sixth column
AICW(myexample,response=1,covariates=c(3,4,5,6),model="lm", nonparam="FALSE")


###################################
# Example 2 - logistic Regression #
###################################

## Consider the following three competing models

M1 <- glm(y2~var1, family="binomial")
M2 <- glm(y2~var1+var4, family="binomial")
M3 <- glm(y2~var1+var2+var3+var4, family="binomial")

# Consider AIC for Complete Cases
AIC(M1)
AIC(M2)
AIC(M3)

### Calculate the AICW (based on nonparametrically estimated weights)  

# Model 1: Response is the first column, covariate is in third column
AICW(myexample,response=2,covariates=3,model="logistic", nonparam="TRUE")
# Model 2: Response is the first column, covariates are in third and sixth column
AICW(myexample,response=2,covariates=c(3,6),model="logistic", nonparam="TRUE")
# Model 3: Response is the first column, covariates are in third, fourth, fifth and sixth column
AICW(myexample,response=2,covariates=c(3,4,5,6),model="logistic", nonparam="TRUE")

### Calculate the AICW (based on nonparametrically estimated weights)  

# Model 1: Response is the first column, covariate is in third column
AICW(myexample,response=2,covariates=3,model="logistic", nonparam="FALSE")
# Model 2: Response is the first column, covariates are in third and sixth column
AICW(myexample,response=2,covariates=c(3,6),model="logistic", nonparam="FALSE")
# Model 3: Response is the first column, covariates are in third, fourth, fifth and sixth column
AICW(myexample,response=2,covariates=c(3,4,5,6),model="logistic", nonparam="FALSE")
