Code for performing Multiple Imputation for External Calibration as in: Guo, Y., Little, R. J., & McConnell, D. S. (2012). On using summary statistics from an external calibration sample to correct for covariate measurement error. Epidemiology, 23(1), 165-74. doi:10.1097/EDE.0b013e31823a4386
## On Using Summary Statistics from an External Calibration Sample to Correct
## for Covariate Measurement Error
## Supplementary Materials
## This supplementary material presents the R source code to perform the MI-EC algorithm
## we describe in the main article.
#Calculate the summary statistics\\
calsummstat <-function(inputdata){
x=inputdata[,1]
w=inputdata[,2]
n=nrow(inputdata)
xbar=mean(x)
wbar=mean(w)
xx=sum(x^2)/n
ww=sum(w^2)/n
xy=sum(x*w)/n
sxx=xx-xbar*xbar
sww=ww-wbar*wbar
sxy=xy-xbar*wbar
b0=xbar-(sxy/sww)*wbar
b1=sxy/sww
sigmasq=sxx-sxy*sxy/sww
param=c(b0,b1,sigmasq,sxx,sww,sxy,xbar,wbar)
return(param)
}
#Draw parameters from their predictive distribution based on the calibration data \\
generateRandomdDrawofMEMParam <- function(calibdata,n) {
ndraw=1
#Calculate the summary statistics of X and W
iniparam=calsummstat(calibdata)
betahat0=iniparam[1]
betahat1=iniparam[2]
rss=n*iniparam[3]
sxx=iniparam[4]
sigmasq=rss/rchisq(ndraw,(n-2))
tmp0=rnorm(1)
beta0=betahat0 + sqrt(sigmasq/n)*tmp0
tmp1=rnorm(1)
beta1=betahat1 + sqrt(sigmasq/(n*sxx))*tmp1
param=c(beta0, beta1, sigmasq)
return(param)
}
#Draw parameters from their predictive distribution based on the main study data. \\
generateRandomMultivarRegreParam <- function(maindata,n,p) {
ndraw=1
w=maindata[,1]
wmat=mat.or.vec(n,2)
wmat[,1]=1
wmat[,2]=w
umat=maindata[,2:(p+1)]
ww=solve(t(wmat)%*%wmat)
coeffhat=ww%*%(t(wmat)%*%umat)
residual=umat-wmat%*%coeffhat
rss=t(residual)%*%residual
a=t(chol(ww))
invrss=solve(rss) #calculate inverse covariance matrix
#Draw covariance of U given W from inverse
#wishart distribution with df = (n-(k+p)+1)
#where k is dimension of wmat and p is dimension of umat
df=n-(p+2)+1
covmatrix=riwish(df, rss)
covmatrix=as.matrix(covmatrix)
betavar= kronecker(covmatrix, ww)
vec.coeffhat=as.vector(coeffhat)
beta = mvrnorm(ndraw, vec.coeffhat, betavar)
#regression coefficients of U on W
multivarRegrecoeff=matrix(beta,2,p)
#residual covariance matrix of U given W
multivarResidcov=covmatrix
param=rbind(multivarRegrecoeff,multivarResidcov)
return(param)
}
#generate parameter of the distribution of surrogate W from the posterior distribution \\
generateErrorvarParam <- function(calibdata,maindata,NCALIB,NSAMPLE){
#combine the information about W from calibration data
#and main study data
w=c(calibdata[,2],maindata[,1])
n=NCALIB+NSAMPLE
ndraw = 1
#Draw sigmaxsq from inverse chi-square distribution
#with df = (n-2)
muw = mean(w)
rss = sum((w-muw)^2)
sigmaxsq = rss/rchisq(ndraw,(n-1))
#Draw mux from normal distribution
tmp = rnorm(1)
mu = muw + sqrt(sigmaxsq/n)*tmp
param = c(mu, sigmaxsq) # two parameters: mean and variance
return(param)
}
#create the initinal mean and covariance matrix \\
createMatrixonW <- function(memParam, dmParam,wParam, P){
m = P+2+1
matrixonW=matrix(NA, nrow = m, ncol = m)
matrixonW[1,1] = -(1 + wParam[1]^2/wParam[2])
matrixonW[1,2] = wParam[1]/wParam[2]
matrixonW[2,2] = - 1/wParam[2]
matrixonW[1:(m-1),3:(m-1)]=dmParam
matrixonW[1:2,m] = memParam[1:2]
matrixonW[m,m] = memParam[3]
matrixonW[3:(m-1),m] = dmParam[2,] * memParam[3] / memParam[2]
return(matrixonW)
}
#complete initinal mean and covariance matrix \\
completeGmatrix <- function(G){
size = nrow(G)
for (i in 1:size){
for (j in 1:size){
if ( is.na(G[i,j]) ) {
if ( is.na(G[j,i]) == FALSE ) {
G[i,j] = G[j,i]
} else {
print(sprintf("ERROR: both elements at [%d,%d] and [%d,%d] are null", i, j, j, i));
}
}
}
}
if (isSymmetric(G)) {
return(G)
} else {
return(NULL)
}
}
#check the symmetry of the matrix created by the sweep operator
isSymmetric <- function(G){
size = nrow(G)
for (i in 1:size){
for (j in 1:size){
if ( abs(G[i, j] - G[j, i]) > 1e-10) {
print(sprintf("ERROR: elements not matched at [%d,%d] and [%d,%d]", i, j, j, i));
return(FALSE);
}
}
}
return(TRUE);
}
#perform the sweep operator \\
sweep <- function(matrixonW){
size = nrow(matrixonW)
curH = completeGmatrix(matrixonW)
newH= matrix(NA, nrow=size, ncol=size)
for (k in 3:(size-1)){
for (i in 1:size){
for (j in 1:size){
if (i==k && j==k) {
newH[i,j] = -1/curH[k,k]
} else if (i==k || j==k) {
newH[i,j] = curH[i,j]/curH[k,k]
} else {
newH[i,j] = curH[i,j] - curH[i,k]*curH[k,j]/curH[k,k]
}
}
}
curH = newH;
newH= matrix(NA, nrow=size, ncol=size)
}
param=curH[,size]
return(param)
}
#Create imputed values for unobserved covariate X
generateMissingvalue <- function(param, maindata){
nsample=nrow(maindata)
nparam=length(param)
#test whether the estimate of the residual variance is negative,
#and print a warning message
if (param[nparam] < 0) {
print(sprintf("%s", "Warning: The estimate of the
residual variance of mismeaured covariate given
the observed data is negative."))
}
mux = param[1]
for (i in 2:(nparam-1)){
mux = mux + param[i]*maindata[,i-1]
}
#Generate X from its posterior distribution with mean mux
#and variance sigmasqxx_wzy
imputedX = mux + sqrt(param[nparam])*rnorm(nsample, mean=0, sd=1)
return(imputedX)
}
#title function \\
printTitle <-function(){
print(sprintf("%s", "#######################################"))
print(sprintf("%s", "## Loading required package: MIEC ####"))
print(sprintf("%s", "#######################################"))
}
#main function \\
MIEC <- function(maindata,calibdata,NCALIB,NSAMPLE,M,N,K,S) {
require(MCMCpack)
#printTitle()
# Turn data.frames into matrices
maindata <- as.matrix(maindata)
calibdata <- as.matrix(calibdata)
MIimputedXbasedoncalib=c()
multipleImputedX=c()
twostageMIimputedX=c()
P=K+S
count = 0
while (count < M) {
#Step-1: draw parameters by regressing X on W based on
#measurement error model
memParam=generateRandomdDrawofMEMParam(calibdata,NCALIB)
#Step-2: draw parameters by regressing (Y, Z) on W based on main
#interested "disease" model
dmParam=generateRandomMultivarRegreParam(maindata,NSAMPLE,P)
#Step-3: generate draws of mean and variance of W
wParam=generateErrorvarParam(calibdata,maindata,NCALIB,NSAMPLE)
#Step-4: creating sweeping matrix on W by using parameters
#obtained from step 1-3 and filling estimated covariance
#parameter between U and X given W
sweepMatrixonW=createMatrixonW(memParam, dmParam,wParam, P)
#Step-5: calculate parameters of the imputation model for X
#given (W,Y,Z) by the sweep operator
impmodelParam = sweep(sweepMatrixonW)
#Step-6: generate random draw for unknown X from its posterior
#distribution, given W and Y
varIndicator=length(impmodelParam)
if (impmodelParam[varIndicator] >= 0){
for (n in 1:N){
secondStageDrawX = generateMissingvalue(impmodelParam, maindata)
twostageMIimputedX=cbind(twostageMIimputedX,secondStageDrawX)
}
count=count+1
}
}
#Output data with Y, Z, and multiply imputed X,
#where maindata[,P+1] = U(Y,Z)
twoStageMIimputeddata=cbind(maindata[,2:(P+1)], twostageMIimputedX)
return(twoStageMIimputeddata)
}