require(pomp)
require(plyr)
require(reshape2)
require(ggplot2)
require(grid)
require(scales)
theme_set(theme_bw())
options(stringsAsFactors=FALSE)
stopifnot(packageVersion("pomp")>="0.58-1")
Set up some facilities for doing parallel computation on a single multi-core machine. Package doMPI
is an alternative for use on clusters.
require(foreach)
require(doMC)
require(iterators)
registerDoMC(cores=4)
set.seed(1860853338,kind="L'Ecuyer")
A little function to save doing unnecessary repeats.
juliaChild <- function (file, expr) {
if (file.exists(file)) {
readRDS(file)
} else {
val <- eval(expr)
saveRDS(val,file=file)
val
}
}
Load the Ebola data and construct a pomp
object holding the data and a simple SEIR model for it.
baseurl <- "http://kinglab.eeb.lsa.umich.edu/ICTPWID/SaoPaulo_2015/"
source(paste0(baseurl,"data/ebola.R"))
Plot the data.
require(plyr)
require(ggplot2)
require(reshape2)
source(paste0(baseurl,"data/eboladata.R"))
wa <- ddply(dat,~date,summarize,cases=sum(cases,na.rm=TRUE),country="WestAfrica")
dat <- join(dat,wa,type='full')
ggplot(data=dat,mapping=aes(x=date,y=cases,group=country))+geom_line(color='blue')+
facet_grid(country~.,scales='free_y')
Plot cumulative case counts.
ggplot(data=ddply(dat,~country,mutate,cases=cumsum(cases)),
mapping=aes(x=date,y=cases,group=country))+geom_line(color='red')+
labs(y="cumulative cases")+
facet_grid(country~.,scales='free_y')
juliaChild("tm-fit.rds",{
starts <- profileDesign(R0=seq(1,2,length=20),
upper=c(k=1,rho=0.999),
lower=c(k=1e-8,rho=0.001),
nprof=10)
profiles <- foreach (start=iter(starts,by='row'),
.combine=rbind,.inorder=FALSE,
.noexport=c("ebolaModel"),
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=1,seed=2086168278L,info=TRUE)) %:%
foreach (type=c("raw","cum"),.combine=rbind,.inorder=FALSE) %dopar%
{
tm <- ebolaModel(country="SierraLeone",type=type,na.rm=TRUE)
coef(tm,names(start)) <- unname(unlist(start))
if (coef(tm,"rho")==1) coef(tm,"rho") <- 0.999
if (coef(tm,"rho")==0) coef(tm,"rho") <- 0.001
tm <- traj.match(tm,est=c("k","rho","E_0","I_0"),transform=TRUE)
if (coef(tm,"rho")==1) coef(tm,"rho") <- 0.999
if (coef(tm,"rho")==0) coef(tm,"rho") <- 0.001
tm <- traj.match(tm,method='subplex',control=list(maxit=1e5))
pompUnload(tm)
data.frame(country="SierraLeone",type=type,as.list(coef(tm)),
loglik=logLik(tm),conv=tm$convergence)
}
}) -> profiles
require(plyr)
require(reshape2)
require(ggplot2)
theme_set(theme_bw())
profiles$mod <- "det"
profiles <- subset(profiles,country=="SierraLeone"&(mod=="det"|type=="raw"))
profiles <- ddply(profiles,~type+mod,mutate,dll=loglik-max(loglik))
ggplot(data=ddply(profiles,~R0+type+mod,subset,loglik==max(loglik)&dll>-10),
mapping=aes(x=R0,y=dll,group=interaction(type,mod),
color=interaction(type,mod),fill=interaction(type,mod)))+
geom_point(alpha=0.5)+geom_smooth(method='gam',formula=y~s(x),alpha=0.2)+
scale_color_manual(name="",values=c(raw.stoch='blue',cum.det='red',cum.stoch='green',raw.det='goldenrod'),
labels=c(raw.stoch="stochastic, raw",cum.det="deterministic, cumulative",
raw.det="deterministic, raw",cum.stoch="stochastic, cumulative"))+
scale_fill_manual(name="",values=c(raw.stoch='blue',cum.det='red',cum.stoch='green',raw.det='goldenrod'),
labels=c(raw.stoch="stochastic, raw",cum.det="deterministic, cumulative",
raw.det="deterministic, raw",cum.stoch="stochastic, cumulative"))+
geom_hline(yintercept=-0.5*qchisq(p=0.95,df=1))+
labs(x=expression("basic reproduction number,"~R[0]),y="log scaled likelihood")+
expand_limits(x=c(1,2))+ylim(c(-10,1))+
theme(strip.background=element_rect(color=NA,fill=NA),
text=element_text(size=15),
strip.text=element_text(face='bold'),
panel.grid.minor=element_line(color=gray(0.9)),
legend.position=c(0.8,0.8))
nsims <- 50
po <- ebolaModel(country="Guinea",timestep=0.1)
params <- parmat(coef(po),3)
params["k",] <- c(0,0.2,0.5)
paramnames <- names(coef(po))
params
## [,1] [,2] [,3]
## N 1.062897e+07 1.062897e+07 1.062897e+07
## R0 1.400000e+00 1.400000e+00 1.400000e+00
## alpha 6.786632e-01 6.786632e-01 6.786632e-01
## gamma 1.053605e+00 1.053605e+00 1.053605e+00
## rho 2.000000e-01 2.000000e-01 2.000000e-01
## cfr 7.000000e-01 7.000000e-01 7.000000e-01
## k 0.000000e+00 2.000000e-01 5.000000e-01
## S_0 9.999991e-01 9.999991e-01 9.999991e-01
## E_0 4.654124e-07 4.654124e-07 4.654124e-07
## I_0 4.654124e-07 4.654124e-07 4.654124e-07
## R_0 1.000000e-08 1.000000e-08 1.000000e-08
juliaChild("sims.rds",{
simdat <- simulate(po,params=params,nsim=nsims,seed=208335746L,
as.data.frame=TRUE,obs=TRUE)
rename(simdat,c(time="week")) -> simdat
mutate(simdat,k=params["k",((as.integer(sim)-1)%%ncol(params))+1])
}) -> simdat
subset(simdat,as.integer(sim)<=12) -> dat
ddply(dat,~k,mutate,sim=as.integer(factor(sim))) -> dat
ggplot(data=dat,
mapping=aes(x=week,y=cases,group=sim))+
geom_line()+
facet_grid(sim~k,
labeller=labeller(
sim=label_value,
k=label_bquote(k==.(x))))+
theme(strip.background=element_rect(color=NA,fill=NA),
text=element_text(size=15),
strip.text=element_text(face='plain'),
panel.grid.minor=element_line(color=gray(0.9)),
axis.text.y=element_blank(),
axis.text.x=element_text(size=rel(0.6)))
juliaChild("tm-sim-profiles-R0.rds",{
fits <- foreach(simul=1:nsims,
.combine=rbind,.inorder=FALSE,
.noexport=c("ebolaModel"),
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=10,info=TRUE)) %:%
foreach(type=c("raw","cum"),.combine=rbind,.inorder=FALSE) %dopar%
{
dat <- subset(simdat,sim==simul,select=c(week,cases,deaths))
tm <- ebolaModel(country="Guinea",data=dat,type=as.character(type))
st <- params[,(simul-1)%%3+1]
true.k <- unname(st["k"])
true.R0 <- unname(st["R0"])
true.rho <- unname(st["rho"])
st["k"] <- st["k"]+1e-6
tm <- traj.match(tm,start=st,est=c("R0","k","rho"),transform=TRUE)
if (coef(tm,"rho")==1) coef(tm,"rho") <- 0.999
if (coef(tm,"rho")==0) coef(tm,"rho") <- 0.001
tm <- traj.match(tm,est=c("R0","k","rho"),method='subplex',transform=TRUE)
pompUnload(tm)
data.frame(sim=simul,type=as.character(type),
true.k=true.k,true.R0=true.R0,true.rho=true.rho,
as.list(coef(tm)),loglik=logLik(tm),
conv=tm$convergence)
}
profiles <- foreach (fit=iter(fits,by="row"),
.noexport=c("ebolaModel"),
.combine=rbind,.inorder=FALSE) %:%
foreach (r0=seq(from=0.7,to=3,length=200),
.combine=rbind,.inorder=FALSE,
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=200)
) %dopar%
{
dat <- subset(simdat,sim==fit$sim,select=c(week,cases,deaths))
tm <- ebolaModel(country="Guinea",data=dat,type=as.character(fit$type))
coef(tm) <- unlist(fit[paramnames])
coef(tm,"R0") <- r0
if (coef(tm,"rho")==1) coef(tm,"rho") <- 0.999
if (coef(tm,"rho")==0) coef(tm,"rho") <- 0.001
tm <- traj.match(tm,est=c("k","rho"),transform=TRUE)
if (coef(tm,"rho")==1) coef(tm,"rho") <- 0.999
if (coef(tm,"rho")==0) coef(tm,"rho") <- 0.001
tm <- traj.match(tm,est=c("k","rho"),transform=TRUE,method='subplex')
pompUnload(tm)
data.frame(sim=fit$sim,type=fit$type,
true.k=fit$true.k,true.R0=fit$true.R0,true.rho=fit$true.rho,
as.list(coef(tm)),loglik=logLik(tm),
conv=tm$convergence)
}
}) -> profiles
juliaChild("tm-sim-fits.rds",{
ddply(profiles,~type+sim+true.k,subset,loglik==max(loglik)) -> starts
fits <- foreach(fit=iter(starts,by="row"),.combine=rbind,.inorder=FALSE,
.noexport=c("ebolaModel"),
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=30)) %dopar%
{
dat <- subset(simdat,sim==fit$sim,select=c(week,cases,deaths))
tm <- ebolaModel(country="Guinea",data=dat,type=as.character(fit$type))
coef(tm) <- unlist(fit[paramnames])
if (coef(tm,"rho")==1) coef(tm,"rho") <- 0.999
if (coef(tm,"rho")==0) coef(tm,"rho") <- 0.001
tm <- traj.match(tm,est=c("k","rho"),transform=TRUE)
if (coef(tm,"rho")==1) coef(tm,"rho") <- 0.999
if (coef(tm,"rho")==0) coef(tm,"rho") <- 0.001
tm <- traj.match(tm,est=c("k","rho"),transform=TRUE,method='subplex')
pompUnload(tm)
data.frame(sim=fit$sim,type=fit$type,
true.k=fit$true.k,true.R0=fit$true.R0,true.rho=fit$true.rho,
as.list(coef(tm)),loglik=logLik(tm),
conv=tm$convergence)
}
}) -> fits
pval <- 0.99
ddply(profiles,
~type+sim+true.k+true.R0+true.rho,
summarize,
maxloglik=max(loglik),
crit=maxloglik-0.5*qchisq(df=1,p=pval), ## critical LR
R0lo=min(R0[loglik>=crit]), ## CI, lower bound
R0hi=max(R0[loglik>=crit]), ## CI, upper bound
R0.CIwidth=R0hi-R0lo, ## width of CI
R0cov=(true.R0>R0lo)&&(true.R0<R0hi) ## truth in CI?
) -> profSummary
ddply(profSummary,
~true.k+type,
summarize,
R0cov.min=qbeta(p=0.025,
shape1=sum(R0cov)+0.5,
shape2=sum(!R0cov)+0.5),
R0cov.max=qbeta(p=0.975,
shape1=sum(R0cov)+0.5,
shape2=sum(!R0cov)+0.5),
R0cov=sum(R0cov)/length(R0cov)
) -> cover
R0comp <- ggplot(data=fits,
mapping=aes(x=factor(true.k),y=R0,
group=interaction(true.k,type),
fill=type)
)+
stat_boxplot(notch=FALSE,coef=1.5,color='black')+
geom_hline(aes(yintercept=true.R0),linetype=2)+
theme_classic()+
coord_cartesian(ylim=c(1,1.6))+
scale_fill_manual(values=c(raw='blue',cum='red'),
labels=c("raw","cumulative"))+
guides(fill=FALSE,color=FALSE)+
labs(x=expression("error overdispersion,"~k),y=expression("estimated"~R[0]))
kcomp <- ggplot(data=fits,
mapping=aes(x=factor(true.k),y=k,
group=interaction(true.k,type),
fill=type)
)+
stat_boxplot(notch=FALSE,coef=1.5,color='black')+
annotate("segment",
x=(1:3)-0.4,xend=(1:3)+0.4,
y=c(0,0.2,0.5),yend=c(0,0.2,0.5),
linetype=2)+
coord_cartesian(ylim=c(-0.1,1.05))+
theme_classic()+
scale_fill_manual(values=c(raw='blue',cum='red'),
labels=c(raw="raw",cum="cumulative"))+
labs(x=expression("error overdispersion,"~k),y=expression("estimated"~k),
fill="Data used:")+
theme(legend.position=c(0.5,0.8))
R0ci <- ggplot(data=profSummary,
mapping=aes(x=factor(true.k),y=R0.CIwidth,
group=interaction(true.k,type),
fill=type)
)+
stat_boxplot(notch=FALSE,coef=1.5,color='black')+
coord_cartesian(ylim=c(0,0.4))+
labs(x=expression("error overdispersion,"~k),
y=expression(R[0]~"CI width"))+
theme_classic()+
scale_fill_manual(values=c(raw='blue',cum='red'))+
guides(fill=FALSE,color=FALSE)
R0cov <- ggplot(data=cover,
mapping=aes(x=factor(true.k),y=R0cov,
ymin=R0cov.min,ymax=R0cov.max,
color=type))+
geom_errorbar(size=1,width=0.1)+geom_point(size=2)+
geom_hline(yintercept=pval,linetype=2)+
theme_classic()+
scale_color_manual(values=c(raw='blue',cum='red'))+
guides(color=FALSE)+
expand_limits(y=c(0,1))+
labs(x=expression("error overdispersion,"~k),
y=expression(R[0]~"coverage"))
juliaChild("mifs1.rds",{
starts <- sobolDesign(lower=c(R0=1,rho=0,k=0.01,E_0=0,I_0=0),
upper=c(R0=5,rho=1,k=1,E_0=1e-4,I_0=1e-4),
nseq=500)
foreach (ctry=c("SierraLeone","Liberia","Guinea","WestAfrica"),
.combine=rbind,.inorder=FALSE,
.noexport=c("ebolaModel"),
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=1,seed=762304675L,info=TRUE)
) %:%
foreach (type=c("raw","cum"),.combine=rbind,.inorder=FALSE) %:%
foreach (start=iter(starts,by='row'),
.combine=rbind,.inorder=FALSE) %dopar%
{
tic <- Sys.time()
po <- ebolaModel(country=ctry,type=type,na.rm=TRUE)
coef(po,names(start)) <- unname(unlist(start))
## First refine the initial conditions using trajectory matching or mif
mf <- mif(po,Nmif=20,
rw.sd = c(E_0=0.02,I_0=0.02),ivps=c("E_0","I_0"),
Np=2000,var.factor=2,method="mif2",
cooling.type="hyperbolic",cooling.fraction=0.1,
transform=TRUE,verbose=FALSE)
coef(po,c("S_0","E_0","I_0","R_0")) <- unname(coef(mf,c("S_0","E_0","I_0","R_0")))
mf <- mif(po, Nmif = 10,
rw.sd = c(R0=0.02,rho=0.02,k=0.02,E_0=0.02,I_0=0.02),
ivps = c("E_0","I_0"),
Np = 2000,
var.factor = 2,
method = "mif2",
cooling.type = "hyperbolic",
cooling.fraction = 0.5,
transform = TRUE,
verbose = FALSE)
mf <- continue(mf, Nmif = 50,
cooling.fraction = 0.1,
method = "mif2",
cooling.type = "hyperbolic")
## Runs 10 particle filters to assess Monte Carlo error in likelihood
pf <- replicate(10,pfilter(mf,Np=5000,max.fail=Inf))
ll <- sapply(pf,logLik)
ll <- logmeanexp(ll, se = TRUE)
nfail <- sapply(pf,getElement,"nfail")
toc <- Sys.time()
etime <- toc-tic
units(etime) <- "hours"
pompUnload(mf)
data.frame(country=ctry,type=type,as.list(coef(mf)),
loglik = ll[1],
loglik.se = ll[2],
nfail.min = min(nfail),
nfail.max = max(nfail),
etime = as.numeric(etime))
}
}) -> pars
juliaChild("profile1.rds",{
pars <- ddply(pars,~country+type,subset,loglik>max(loglik)-20)
pars <- subset(pars,is.finite(loglik)&nfail.max==0,
select=-c(loglik,loglik.se,nfail.max,nfail.min,etime))
pars <- melt(pars,id=c("country","type"),variable.name="parameter")
pars <- ddply(pars,~country+type+parameter,summarize,
min=min(value),max=max(value))
pars <- melt(pars,id=c("country","type","parameter"))
starts <- daply(pars,country~type~parameter,function(x)range(x$value))
starts <- adply(starts,c(1,2),function(x)
profileDesign(R0=seq(from=0.8*x["R0",1],to=1.2*x["R0",2],length=60),
lower=x[c("rho","k","S_0","E_0","I_0"),1],
upper=x[c("rho","k","S_0","E_0","I_0"),2],
nprof=100)
)
foreach (start=iter(starts,by='row'),
.combine=rbind,.inorder=FALSE,
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=1,seed=1264624821L),
.noexport=c("ebolaModel")) %dopar%
{
tic <- Sys.time()
ctry <- as.character(start$country)
type <- as.character(start$type)
st <- unlist(subset(start,select=-c(country,type)))
po <- ebolaModel(country=ctry,timestep=0.01,type=type,na.rm=TRUE,nstage=3L)
coef(po,names(st)) <- st
mf <- mif(po, Nmif=10,
rw.sd = c(rho=0.02,k=0.02,E_0=0.02,I_0=0.02),
ivps = c("E_0","I_0"),
Np = 2000,
var.factor = 2,
method = "mif2",
cooling.type = "hyperbolic",
cooling.fraction = 0.5,
transform = TRUE,
verbose = FALSE)
mf <- continue(mf, Nmif = 50, cooling.fraction = 0.1)
## Runs 10 particle filters to assess Monte Carlo error in likelihood
pf <- replicate(10,pfilter(mf,Np=5000,max.fail=Inf))
ll <- sapply(pf,logLik)
ll <- logmeanexp(ll, se = TRUE)
nfail <- sapply(pf,getElement,"nfail")
toc <- Sys.time()
etime <- toc-tic
units(etime) <- "hours"
## unload dynamical executable
pompUnload(po)
data.frame(country=ctry,type=type,as.list(coef(mf)),
loglik = ll[1],
loglik.se = ll[2],
nfail.min = min(nfail),
nfail.max = max(nfail),
etime = as.numeric(etime))
}
}) -> pars
juliaChild("profile2.rds",{
pars <- subset(pars,is.finite(loglik)&nfail.max==0)
pars <- ddply(pars,~country+type+R0,subset,rank(-loglik)<=5)
pars <- subset(pars,select=-c(loglik,loglik.se,nfail.max,nfail.min,etime))
results <- foreach (start=iter(pars,by='row'),
.combine=rbind,.inorder=FALSE,
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=1,seed=1264624821L),
.noexport=c("ebolaModel")) %dopar%
{
tic <- Sys.time()
ctry <- as.character(start$country)
type <- as.character(start$type)
st <- unlist(subset(start,select=-c(country,type)))
po <- ebolaModel(country=ctry,timestep=0.01,type=type,na.rm=TRUE,nstage=3L)
coef(po,names(st)) <- unname(st)
## Runs 10 particle filters to assess Monte Carlo error in likelihood
pf <- try(replicate(10,pfilter(po,Np=5000,max.fail=Inf)))
## unload dynamical executable
pompUnload(po)
toc <- Sys.time()
etime <- toc-tic
units(etime) <- "hours"
if (!inherits(pf,"try-error")) {
ll <- sapply(pf,logLik)
ll <- logmeanexp(ll, se = TRUE)
nfail <- sapply(pf,getElement,"nfail")
data.frame(country=ctry,type=type,as.list(coef(po)),
loglik = ll[1],
loglik.se = ll[2],
nfail.min = min(nfail),
nfail.max = max(nfail),
etime = as.numeric(etime))
} else {
data.frame(country=ctry,type=type,as.list(coef(po)),
loglik = NA,
loglik.se = NA,
nfail.min = NA,
nfail.max = NA,
etime = as.numeric(etime))
}
}
}) -> prof
prof <- ddply(prof,~country+type,mutate,dll=loglik-max(loglik),
country=factor(country,levels=c("Guinea","Liberia","SierraLeone","WestAfrica")))
prof <- ddply(prof,~country+type+R0,subset,loglik==max(loglik))
ggplot(data=prof,
mapping=aes(x=R0,y=dll,color=type,fill=type))+
facet_wrap(~country,scales="fixed",ncol=1)+
geom_point(alpha=0.5)+
geom_smooth(method='loess',alpha=0.2)+
scale_color_manual(name="",values=c(raw='blue',cum='red'),
labels=c(rawh="raw",cum="cumulative"))+
scale_fill_manual(name="",values=c(raw='blue',cum='red'),
labels=c(rawh="raw",cum="cumulative"))+
geom_hline(yintercept=-0.5*qchisq(p=0.95,df=1))+
labs(x = expression("basic reproduction number, "~R[0]),
y = "log scaled likelihood",
color = "Data used:", fill="Data used:")+
xlim(c(1,2.5))+ylim(c(-10,0))+
theme_classic()+
theme(strip.background=element_rect(color=NA,fill=NA),
text=element_text(size=14),
strip.text=element_text(face='bold'),
panel.grid.minor=element_line(color=gray(0.9)),
legend.position=c(0.75,0.89),
legend.key.size=unit(10,"pt"),
legend.text=element_text(size=rel(0.8))
)
mles <- ddply(prof,~country+type,subset,loglik==max(loglik)&type=="raw")
mles <- mutate(mles,odds=(1-rho)/rho)
mles <- subset(mles,select=-c(dll,loglik,nfail.min,nfail.max,S_0,E_0,I_0,R_0,alpha,gamma,cfr))
kable(mles)
country | type | N | R0 | rho | k | loglik.se | etime | odds |
---|---|---|---|---|---|---|---|---|
Guinea | raw | 10628972 | 1.217663 | 0.0144752 | 0.3822749 | 0.0110781 | 0.0706478 | 68.083447 |
Liberia | raw | 4092310 | 1.939128 | 0.0234293 | 0.2357219 | 0.0056410 | 0.0311655 | 41.681607 |
SierraLeone | raw | 6190280 | 1.370597 | 0.1235683 | 0.0761270 | 0.0130909 | 0.0305334 | 7.092691 |
WestAfrica | raw | 20911562 | 1.477556 | 0.0061551 | 0.1891024 | 0.0147498 | 0.0653890 | 161.467758 |
mles <- ddply(prof,~country+type,subset,loglik==max(loglik))
mles <- subset(mles,type=="raw")
time1 <- c(Guinea="2014-01-05",Liberia="2014-06-01",
SierraLeone="2014-06-08",WestAfrica="2014-01-05")
juliaChild("diagnostics-sim.rds",{
sims <- foreach (mle=iter(mles,by='row'),.combine=rbind,
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=1,seed=1264624821L)
) %dopar%
{
country=as.character(mle$country)
type=as.character(mle$type)
M <- ebolaModel(country=country,type=type,
na.rm=TRUE,nstage=3,timestep=0.01)
p <- unlist(subset(mle,select=-c(country,type,
loglik,loglik.se,
nfail.min,nfail.max,etime)))
coef(M,names(p)) <- unname(unlist(p))
sim <- simulate(M,nsim=10,as.data.frame=TRUE,obs=TRUE,include.data=TRUE,
seed=2186L)
pompUnload(M)
sim$date <- as.Date(time1[country])+sim$time*7
sim$country <- country
sim$type <- type
sim
}
mutate(sims,
data=ifelse(sim=="data","data","simulation"),
country=mapvalues(country,
from=c("SierraLeone","WestAfrica"),
to=c("Sierra Leone","West Africa"))
)
}) -> sim
juliaChild("diagnostics-probes.rds",{
pbs <- foreach (mle=iter(mles,by='row'),.combine=rbind,
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=1,seed=1264624821L)) %dopar%
{
country=as.character(mle$country)
type=as.character(mle$type)
M <- ebolaModel(country=country,type=type,
na.rm=TRUE,nstage=3,timestep=0.01)
p <- unlist(subset(mle,select=-c(country,type,
loglik,loglik.se,
nfail.min,nfail.max,etime)))
coef(M,names(p)) <- unname(unlist(p))
pb <- probe(M,probes=list(probe.acf(var="cases",lags=1,type="correlation")),
nsim=500,seed=1878812716L)
pb <- as.data.frame(pb)
pb <- mutate(pb,
sim=rownames(pb),
data=ifelse(sim=="data","data","simulation"),
type=type,
country=country)
pb
}
mutate(pbs,
country=mapvalues(country,
from=c("SierraLeone","WestAfrica"),
to=c("Sierra Leone","West Africa")))
}) -> pbs1
Among these is exponential growth rate
probe.trend <- function (var) {
if (length(var) > 1) stop(sQuote("probe.trend")," is a univariate probe")
function (y) {
cases <- as.numeric(y[var, ])
df <- data.frame(week = 1:length(cases), cases = cases)
M <- lm(log(1 + cases) ~ week, data = df)
as.numeric(coef(M)[2])
}
}
juliaChild("diagnostics-addl-probes.rds",{
pbs <- foreach (mle=iter(mles,by='row'),.combine=rbind,
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=1,seed=1264624821L)) %dopar%
{
country=as.character(mle$country)
type=as.character(mle$type)
M <- ebolaModel(country=country,type=type,
na.rm=TRUE,nstage=3,timestep=0.01)
p <- unlist(subset(mle,select=-c(country,type,
loglik,loglik.se,
nfail.min,nfail.max,etime)))
coef(M,names(p)) <- unname(unlist(p))
pb <- probe(M,
probes=list(sd=probe.sd(var="cases"),
probe.quantile(var="cases",prob=c(0.9)),
probe.acf(var="cases",lags=c(1,2,3),type="correlation"),
trend=probe.trend(var="cases")),
nsim=2000,seed=2186L
)
mutate(as.data.frame(pb),
sim=rownames(pb),
kind=ifelse(sim=="data","data","simulation"),
type=type,
country=country)
}
pbs <- melt(pbs,id=c("country","type","kind","sim"),variable.name="probe")
pbs <- mutate(pbs,
country=mapvalues(country,
from=c("SierraLeone","WestAfrica"),
to=c("Sierra Leone","West Africa")),
probe=mapvalues(probe,
from=c("sd","90%","acf.1.cases","acf.2.cases","acf.3.cases","trend"),
to=c("SD","90%ile","ACF(1)","ACF(2)","ACF(3)","TREND")))
arrange(pbs,country,type,probe,kind,sim)
}) -> probes
ddply(pbs1,~country+type,summarize,
k=sum(acf.1.cases[data=="simulation"]>acf.1.cases[data=="data"]),
n=length(acf.1.cases[data=="simulation"]),
pval=(k+0.5)/(n+1)) -> pvals
nsim <- length(unique(sim$sim))-1
npsim <- length(unique(pbs1$sim))-1
mainpl <- ggplot(data=sim,
mapping=aes(x=date,y=cases,group=sim,color=data,size=data))+
geom_line()+
scale_color_manual(values=c(simulation=gray(0.6),data='blue'))+
scale_size_manual(values=c(simulation=0.6,data=1.2))+
facet_wrap(~country,scales='free')+
labs(color=NA,y="weekly cases",x="date")+
guides(color=guide_legend(title=NULL),size=guide_legend(title=NULL))+
theme_classic()+
theme(strip.background=element_rect(color=NA,fill=NA),
text=element_text(size=15),
strip.text=element_text(face='bold'),
panel.grid.minor=element_line(color=gray(0.9)),
legend.position=c(0.5,1.05),
legend.background=element_rect(fill=NA),
legend.direction='horizontal'
)
hists <- dlply(pbs1,~country,
function(x) {
pval <- subset(pvals,country==unique(x$country))$pval
ggplot(data=subset(x,data=="simulation"),
mapping=aes(x=acf.1.cases))+
geom_histogram(binwidth=0.05,color='black',fill=gray(0.8),
mapping=aes(y=..density..))+
expand_limits(x=c(0,1),y=c(0,4))+
labs(x="ACF(1)",y="density")+
geom_vline(color='blue',data=subset(x,data=="data"),size=1.2,
mapping=aes(xintercept=acf.1.cases))+
annotate("text",x=0,y=3.5,hjust=0,
label=paste0("P==",signif(pval,2)),parse=TRUE,size=3)+
theme_bw()+
theme(text=element_text(size=8),
plot.background=element_rect(fill=NA,color=NA),
panel.border=element_blank(),
axis.line.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.ticks.margin=unit(0,"lines"),
panel.grid=element_blank(),
plot.margin=unit(c(1,1,0.5,0.5),"lines"))
}
)
ggplot(data=subset(probes,type=="raw"&kind=="simulation"),
mapping=aes(x=value))+
geom_density(fill=grey(0.9))+
facet_wrap(~probe+country,scales='free',ncol=4)+
geom_vline(data=subset(probes,kind=='data'),
mapping=aes(xintercept=value),
color='blue')+
labs(x="probe value",y="probability density")+
theme(strip.background=element_rect(color=NA,fill=NA),
text=element_text(size=14),
strip.text=element_text(face='plain',size=10),
panel.grid.minor=element_line(color=gray(0.9)),
axis.text.y=element_blank(),
axis.text.x=element_text(size=rel(0.6)))
juliaChild("forecasts.rds",{
set.seed(988077383L)
stopifnot(packageVersion("pomp")>="0.58-4")
starts <- c(Guinea="2014-01-05",Liberia="2014-06-01",SierraLeone="2014-06-08")
M0 <- ebolaModel(country="SierraLeone",
timestep=0.01,nstageE=3,
type="raw",na.rm=TRUE)
datmax <- max(time(M0))
### DETERMINISTIC MODELS
mles <- readRDS("tm-fit.rds")
mles <- ddply(mles,~country+type,subset,loglik==max(loglik),
select=-c(loglik,conv))
mles <- melt(mles,id=c("country","type"))
mles <- mutate(mles,value=round(value,6))
mles <- unique(mles)
mles <- dcast(mles,country+type~variable,value.var='value')
fc_tm <- foreach (mle=iter(mles,by='row'),nsamp=c(50000,10000),
.inorder=TRUE,.combine=rbind) %do%
{
theta <- unlist(subset(mle,select=-c(country,type)))
params <- parmat(theta,nsamp)
t(sobolDesign(lower=0.8*theta[c("R0","rho","k","S_0","E_0","I_0")],
upper=1.2*theta[c("R0","rho","k","S_0","E_0","I_0")],
nseq=nsamp)
) -> params[c("R0","rho","k","S_0","E_0","I_0"),]
M1 <- ebolaModel(country="SierraLeone",
timestep=0.01,nstageE=3,
type=as.character(mle$type),na.rm=TRUE)
save.seed <- .Random.seed
set.seed(923530037L,kind="L'Ecuyer-CMRG")
loglik <- foreach (k=seq_len(ncol(params)),
.inorder=TRUE,.combine=c,
.noexport=c("M0","M1"),
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=5,info=TRUE)) %dopar% {
tm <- traj.match(M1,start=params[,k])
logLik(tm)
}
weights <- exp(loglik)/sum(exp(loglik))
cat("effective sample size for type",sQuote(mle$type),
"in",mle$country,"=",1/sum(weights^2),"\n")
sample_ind <- sample.int(ncol(params),size=nsamp,replace=TRUE,prob=weights)
params <- params[,sample_ind]
sims <- foreach (k=seq_len(ncol(params)),
.inorder=FALSE,
.noexport=c("M0","M1"),
.options.multicore=list(set.seed=TRUE),
.options.mpi=list(chunkSize=5,info=TRUE)) %dopar%
{
traj <- trajectory(M0,times=1:30,params=params[,k])
dat <- rmeasure(M0,times=1:30,params=params[,k],x=traj)
data.frame(time=1:30,
incidence=traj["N_EI",,,drop=TRUE],
mean.cases=params["rho",k]*traj["N_EI",,,drop=TRUE],
cases=dat["cases",,,drop=TRUE])
}
names(sims) <- seq_along(sims)
ldply(sims,.id='rep') -> sims
.Random.seed <<- save.seed
mutate(sims,type=mle$type,country=mle$country) -> sims
sims
}
fc_tm$period <- ifelse(fc_tm$time<=datmax,"calibration","projection")
fc_tm <- subset(fc_tm,select=c(country,type,time,rep,cases,incidence,mean.cases,period))
### STOCHASTIC MODEL
nsamp <- 1e4
pars <- join(readRDS("mifs1.rds"),readRDS("profile2.rds"),type='full')
pars <- subset(pars,country=="SierraLeone"&type=="raw")
mutate(pars,weights=exp(loglik)/sum(exp(loglik))) -> pars
print(with(pars,1/sum(weights^2))) ## Effective sample size
sample_ind <- sample.int(nrow(pars),size=nsamp,replace=TRUE,prob=pars$weights)
params <- parmat(coef(M0),nrep=nsamp)
nm <- intersect(rownames(params),names(pars))
params[nm,] <- t(pars[sample_ind,nm])
pf <- pfilter(M0,params=params,save.params=TRUE,save.states=TRUE,seed=1568335316L)
t <- time(M0)
acast(melt(pf$saved.params),variable~rep~L1) -> p
acast(melt(pf$saved.states),variable~rep~L1) -> x
sim.cases <- laply(seq_along(t),function(k)rmeasure(M0,times=t[k],params=p[,,k],x=x[,,k,drop=FALSE]))
dimnames(sim.cases) <- list(time=t,observable=c("cases","deaths"),rep=NULL)
sim.cases <- melt(sim.cases[,"cases",],value.name='cases')
filt_dist <- melt(list(states=pf$saved.states,params=pf$saved.params))
filt_dist$time <- t[filt_dist$L2]
filt_dist$L1 <- NULL
filt_dist$L2 <- NULL
filt_dist <- dcast(filt_dist,time+rep~variable)
mutate(filt_dist,incidence=N_EI,mean.cases=rho*incidence) -> filt_dist
filt_dist <- join(filt_dist,sim.cases,type='full',by=c("time","rep"))
M1 <- M0
time(M1) <- seq(from=18,to=30)
timezero(M1) <- datmax
p <- p[,,datmax,drop=TRUE]
p[c("S_0","E_0","I_0","R_0"),] <- apply(x[,,datmax],2,
function(x)c(x["S"],
sum(x[c("E1","E2","E3")]),
x["I"],x["R"]))
sims <- simulate(M1,params=p,states=TRUE,obs=TRUE,seed=499643724L)
sims <- melt(sims)
sims$L1 <- NULL
dimnames(p) <- setNames(dimnames(p),c("variable","rep"))
pred_dist <- join(dcast(sims,time+rep~variable),
dcast(melt(p),rep~variable),
by='rep',type='left')
pred_dist <- mutate(pred_dist,time=timezero(M1)+time,
incidence=N_EI,mean.cases=rho*incidence)
fc_mf <- ldply(list(calibration=filt_dist,projection=pred_dist),.id='period')
fc_mf$country <- "SierraLeone"
fc_mf$type <- "raw"
fc_mf <- subset(fc_mf,select=c(country,type,time,rep,cases,incidence,mean.cases,period))
fc <- ldply(list(stoch=fc_mf,det=fc_tm),.id='model')
fc <- ddply(fc,~country,mutate,
model=factor(model,levels=c("stoch","det")),
date=as.Date(starts[unique(as.character(country))])+7*(time-1))
}) -> fc
fc <- ddply(fc,~country+type+model+time+date+period,summarize,
prob=c(0.025,0.5,0.975),
quantile=quantile(cases,probs=prob))
fc <- mutate(fc,prob=mapvalues(prob,from=c(0.025,0.5,0.975),
to=c("lower","median","upper")))
fc <- dcast(fc,time+date+period+country+type+model~prob,value.var='quantile')
source(paste0(baseurl,"data/eboladata.R"))
dat <- subset(dat,country=="SierraLeone"&is.finite(cases))
dat$period <- "calibration"
ggplot(data=subset(fc,(type=="raw"&model=="stoch")|(type=="cum"&model=="det")),
mapping=aes(x=date))+
geom_ribbon(mapping=aes(ymin=lower,ymax=upper,color=model,fill=model,
group=interaction(model,period),alpha=period))+
geom_line(size=1.3,mapping=aes(y=median,color=model,group=interaction(model,period)))+
geom_point(data=dat,mapping=aes(x=date,y=cases,shape=period),size=3)+
geom_vline(xintercept=as.numeric(max(subset(fc,period=='calibration')$date)))+
geom_text(data=data.frame(x=as.Date(c("2014-06-08","2014-10-12")),
y=2100,
label=c("Calibration","Projection")),
mapping=aes(x=x,y=y,label=label), hjust=0)+
scale_alpha_manual(values=c(calibration=0.4,projection=0.2))+
scale_fill_manual(values=c(det=alpha("red",0.4),stoch=alpha("blue",0.3)),
labels=c(stoch="stochastic, raw",
det="deterministic, cumulative"))+
scale_color_manual(values=c(det=alpha("red",0.4),stoch=alpha("blue",0.3)),
labels=c(stoch="stochastic, raw",
det="deterministic, cumulative"))+
scale_shape_manual(values=c(calibration=17,projection=19),guide=FALSE)+
labs(x="Date",y="Weekly reported incidence",fill="model, data",
color="model, data",alpha="")+
guides(alpha=FALSE)+
theme(strip.background=element_rect(color=NA,fill=NA),
text=element_text(size=15),
strip.text=element_text(face='bold'),
panel.grid.minor=element_line(color=gray(0.9)),
legend.position=c(0.3,0.5)
)
profiles <- ldply(list(det=readRDS("tm-fit.rds"),
stoch=readRDS("profile2.rds")),.id='mod')
profiles <- subset(profiles,country=="SierraLeone"&(mod=="det"|type=="raw"))
profiles <- ddply(profiles,~type+mod,mutate,dll=loglik-max(loglik))
ggplot(data=ddply(profiles,~R0+type+mod,subset,loglik==max(loglik)&dll>-10),
mapping=aes(x=R0,y=dll,group=interaction(type,mod),
color=interaction(type,mod),fill=interaction(type,mod)))+
geom_point(alpha=0.5)+geom_smooth(method='gam',formula=y~s(x),alpha=0.2)+
scale_color_manual(name="",values=c(raw.stoch='blue',
cum.det='red',cum.stoch='green',
raw.det='goldenrod'),
labels=c(raw.stoch="stochastic, raw",
cum.det="deterministic, cumulative",
raw.det="deterministic, raw",
cum.stoch="stochastic, cumulative"))+
scale_fill_manual(name="",values=c(raw.stoch='blue',cum.det='red',
cum.stoch='green',
raw.det='goldenrod'),
labels=c(raw.stoch="stochastic, raw",
cum.det="deterministic, cumulative",
raw.det="deterministic, raw",
cum.stoch="stochastic, cumulative"))+
geom_hline(yintercept=-0.5*qchisq(p=0.95,df=1))+
labs(x=expression("basic reproduction number,"~R[0]),
y="log scaled likelihood")+
expand_limits(x=c(1,2))+ylim(c(-10,1))+
theme(strip.background=element_rect(color=NA,fill=NA),
text=element_text(size=15),
strip.text=element_text(face='bold'),
panel.grid.minor=element_line(color=gray(0.9)),
legend.position=c(0.8,0.8))
fc <- readRDS("forecasts.rds")
fc <- ddply(fc,~country+type+model+time+date+period,summarize,
prob=c(0.025,0.5,0.975),
quantile=quantile(mean.cases,probs=prob))
fc <- mutate(fc,prob=mapvalues(prob,from=c(0.025,0.5,0.975),
to=c("lower","median","upper")))
fc <- dcast(fc,time+date+period+country+type+model~prob,value.var='quantile')
ggplot(data=fc,mapping=aes(x=date))+
geom_ribbon(mapping=aes(ymin=lower,ymax=upper,color=interaction(type,model),
fill=interaction(type,model),
group=interaction(type,model,period),alpha=period))+
geom_line(size=1.3,mapping=aes(y=median,color=interaction(type,model),
group=interaction(type,model,period)))+
geom_vline(xintercept=as.numeric(max(subset(fc,period=="calibration")$date)))+
geom_text(data=data.frame(x=as.Date(c("2014-06-08","2014-10-12")),
y=2100,
label=c("Calibration","Projection")),
mapping=aes(x=x,y=y,label=label), hjust=0)+
scale_alpha_manual(values=c(calibration=0.4,projection=0.2))+
scale_fill_manual(values=c(cum.det=alpha("red",0.4),
raw.stoch=alpha("blue",0.3),
raw.det=alpha("goldenrod",0.6)),
labels=c(cum.det="deterministic, cumulative",
raw.stoch="stochastic, raw",
raw.det="deterministic, raw"))+
scale_color_manual(values=c(cum.det=alpha("red",0.4),
raw.stoch=alpha("blue",0.3),
raw.det=alpha("goldenrod",0.6)),
labels=c(cum.det="deterministic, cumulative",
raw.stoch="stochastic, raw",
raw.det="deterministic, raw"))+
labs(x="Date",y="Weekly reported incidence",fill="model, data",
color="model, data",alpha="")+
guides(alpha=FALSE)+
theme_bw()+
theme(strip.background=element_rect(color=NA,fill=NA),
text=element_text(size=15),
strip.text=element_text(face='bold'),
panel.grid.minor=element_line(color=gray(0.9)),
legend.position=c(0.3,0.5)
)
fc <- readRDS("forecasts.rds")
fc <- ddply(fc,~country+type+model+time+date+period,summarize,
prob=c(0.025,0.5,0.975),
quantile=quantile(incidence,probs=prob))
fc <- mutate(fc,prob=mapvalues(prob,from=c(0.025,0.5,0.975),
to=c("lower","median","upper")))
fc <- dcast(fc,time+date+period+country+type+model~prob,value.var='quantile')
ggplot(data=fc,mapping=aes(x=date))+
geom_ribbon(mapping=aes(ymin=lower,ymax=upper,color=interaction(type,model),
fill=interaction(type,model),
group=interaction(type,model,period),alpha=period))+
geom_line(size=1.3,mapping=aes(y=median,color=interaction(type,model),
group=interaction(type,model,period)))+
geom_vline(xintercept=as.numeric(max(subset(fc,period=="calibration")$date)))+
geom_text(data=data.frame(x=as.Date(c("2014-06-08","2014-10-12")),
y=12000,
label=c("Calibration","Projection")),
mapping=aes(x=x,y=y,label=label), hjust=0)+
scale_alpha_manual(values=c(calibration=0.4,projection=0.2))+
scale_fill_manual(values=c(cum.det=alpha("red",0.4),raw.stoch=alpha("blue",0.3),
raw.det=alpha("goldenrod",0.6)),
labels=c(cum.det="deterministic, cumulative",
raw.stoch="stochastic, raw",
raw.det="deterministic, raw"))+
scale_color_manual(values=c(cum.det=alpha("red",0.4),
raw.stoch=alpha("blue",0.3),
raw.det=alpha("goldenrod",0.6)),
labels=c(cum.det="deterministic, cumulative",
raw.stoch="stochastic, raw",
raw.det="deterministic, raw"))+
labs(x="Date",y="True weekly incidence",fill="model, data",
color="model, data",alpha="")+
guides(alpha=FALSE)+
theme_bw()+
theme(strip.background=element_rect(color=NA,fill=NA),
text=element_text(size=15),
strip.text=element_text(face='bold'),
panel.grid.minor=element_line(color=gray(0.9)),
legend.position=c(0.3,0.5)
)