coal=read.table('coal.dat',header = TRUE)
# plot(coal$disasters,type='l')
ind = 1:length(coal$year)
# priors
TT = length(coal$year) # number of time points
set.seed(1234)
theta0 = sample(1:TT, 1) #c hange point,draw from discrete uniform
alpha0 = rgamma(1,shape = 10, rate = 10)
lambda1_0 =lambda2_0 = rgamma(1,shape = 3, rate = alpha0)
# Gibbs sampling
MC = 10000
burn = 1:3000
gibbstheta = rep(0,MC)
gibbstheta.prob = array(0,dim = c(TT,1, MC))# prob theta=i at each iteration, i=1 to TT
gibbsalpha = rep(0,MC)
gibbslambda1 = rep(0,MC)
gibbslambda2 = rep(0,MC)
# starting value
gibbstheta[1] = theta0
gibbstheta.prob[,,1] = rep(1/TT,TT)
gibbsalpha[1] = alpha0
gibbslambda1[1] = lambda1_0
gibbslambda2[1] = lambda2_0
# MCMC loop
for (it in 2:MC) {
# draw alpha
gibbsalpha[it] = rgamma(1,shape = 16, rate = gibbslambda1[it-1]+gibbslambda2[it-1]+10)
# draw lambda
# update rate: count time points before/after theta
gibbslambda1[it] = rgamma(1, shape = 3+sum(coal$disasters[ind <=gibbstheta[it-1]]),
rate = sum(ind <=gibbstheta[it-1])+gibbsalpha[it])
gibbslambda2[it] = rgamma(1, shape = 3+sum(coal$disasters[ind > gibbstheta[it-1]]),
rate = sum(ind > gibbstheta[it-1])+gibbsalpha[it])
# gibbstheta.prob
for (j in 1:TT) {
f1 = function(x) gibbslambda1[it]^x*exp(-gibbslambda1[it])/factorial(x)
f2 = function(x) gibbslambda2[it]^x*exp(-gibbslambda2[it])/factorial(x)
gibbstheta.prob[j,,it] = (1/TT)*prod(unlist(sapply(coal$disasters[ind <=j],f1)))*prod(unlist(sapply(coal$disasters[ind > j],f2)))
# f1 = function(x) x*log(gibbslambda1[it])-log(factorial(x))
# f2 = function(x) x*log(gibbslambda2[it])-log(factorial(x))
# gibbstheta.prob[j,,it] = log(1/TT)+ sum(unlist(sapply(coal$disasters[ind <= j],f1)))+sum(unlist(sapply(coal$disasters[ind > j],f2)))
}
# if log-prob is negative, set as zero
# gibbstheta.prob[,,it][gibbstheta.prob[,,it]<0]=0
# draw theta
gibbstheta[it] = sample(1:TT,1, prob = gibbstheta.prob[,,it]/sum(gibbstheta.prob[,,it]))
}MCMC II
Example: a Bayesian Change-Point Problem with Poisson Counts
The goal of this problem is to estimate the posterior distribution of the model parameters via a Gibbs sampler.
Data on Coal-Mining Disasters
Figure below shows some data on the number of coal-mining disasters per year between 1851 and 1962. The rate of accidents per year appears to decrease around 1900, so we consider a change-point model for these data.
Model and Assumptions
\(X_{j}\) is the data on coal-mining disasters from 1851 to 1962, the change-point model is \[ X_{j} \sim\left\{\begin{array}{ll} \operatorname{Poisson}\left(\lambda_{1}\right), & j=1, \ldots, \theta \\ \operatorname{Poisson}\left(\lambda_{2}\right), & j=\theta+1, \ldots, 112 \end{array}\right. \] \(\lambda_{i} | \alpha \sim \operatorname{Gamma}(3, \alpha)\) for \(i=1,2\), where \(\alpha \sim \operatorname{Gamma}(10, 10)\) and \(\theta\) follows a discrete uniform distribution over \(\{1, \ldots, 112\}\).
The goal of this problem is to estimate the posterior distribution of the model parameters via a Gibbs sampler.
Posterior Distributions
The joint posterior distribution of parameter \(\lambda\_1, \lambda\_2, \alpha, \theta\) is \[ \begin{align*} p(\lambda_1, \lambda_2, \alpha, \theta|\mathbf{x}_{1:T}) &\propto p(\lambda_1, \lambda_2, \alpha, \theta, \mathbf{x}_{1:T})\\ &\propto f( \mathbf{x}_{1:T}| \lambda_1, \lambda_2, \alpha, \theta) p( \lambda_1, \lambda_2, \alpha, \theta)\\ & \propto f( \mathbf{x}_{1:T}| \lambda_1, \lambda_2, \alpha, \theta) p( \lambda_1|\alpha) p(\lambda_2|\alpha) p(\alpha) p(\theta)\\ & \propto \left(\prod_{1\leq j \leq \theta} \operatorname{Poisson}(x_j|\lambda_1, \alpha, \theta) \right) \left( \prod_{\theta +1 \leq j \leq 112} \operatorname{Poisson}(x_j|\lambda_2, \alpha, \theta) \right) \\ &\quad \operatorname{Gamma}(\lambda_1|\alpha)\operatorname{Gamma}(\lambda_2|\alpha) \operatorname{Gamma}(\alpha) \operatorname{Uniform}(\theta|\{1,\dots,112\})\\ & \propto \left(\prod_{1\leq j \leq \theta} \frac{\lambda^{x_j}_1}{x_j !}\exp{\{-\lambda_1\}}\right) \left(\prod_{\theta +1 \leq j \leq 112} \frac{\lambda^{x_j}_2}{x_j !}\exp{\{-\lambda_2\}}\right) \\ &\quad \frac{\alpha^3 \lambda^{3-1}_1}{\Gamma(3)} \exp{\{-\alpha \lambda_1\}} \frac{\alpha^3 \lambda^{3-1}_2}{\Gamma(3)} \exp{\{-\alpha \lambda_2\}} \frac{10^{10} \alpha^{10-1}}{\Gamma(10)} \exp{\{-10\alpha\}} \frac{1}{112} \end{align*} \] The full conditional posterior distributions are as following: \[ \begin{align*} p(\alpha|\lambda_1,\lambda_2, \theta,\mathbf{x}_{1:T}) \propto c \cdot \alpha^{16-1} \exp{\{-(\lambda_1+\lambda_2+10)\alpha\}}, \end{align*} \] then \[\alpha|\dots \sim \operatorname{Gamma}\left(16,\lambda_1+\lambda_2+10 \right),\] and \[ \begin{align*} p(\lambda_1|\lambda_2, \alpha, \theta, \mathbf{x}_{1:T}) &\propto \frac{\alpha^3 \lambda^{3-1}_1}{\Gamma(3)} \exp{\{-\alpha \lambda_1\}} \left(\prod_{1\leq j \leq \theta} \frac{\lambda^{x_j}_1}{x_j !}\exp{\{-\lambda_1\}}\right)\\ &\propto c_1 \cdot \lambda^{3+(\sum_{1\leq j \leq \theta}{x_j})-1}_1 \exp \left\{-\left(\alpha+\sum \mathbf{1}_{\{1 \leq j \leq \theta\}}\right) \lambda_{1}\right\} \end{align*} \]
so, \[\lambda_1|\dots \sim \operatorname{Gamma}\left(3+\sum_{1\leq j \leq \theta}{x_j}, \alpha+\sum \mathbf{1}_{\{1 \leq j \leq \theta\}} \right)\]
similarly, \[ \begin{align*} p(\lambda_2|\lambda_1, \alpha, \theta, \mathbf{x}_{1:T}) &\propto \frac{\alpha^3 \lambda^{3-1}_2}{\Gamma(3)} \exp{\{-\alpha \lambda_2\}} \left(\prod_{\theta+1\leq j \leq 112} \frac{\lambda^{x_j}_2}{x_j !}\exp{\{-\lambda_2\}}\right)\\ &\propto c_2 \cdot \lambda^{3+(\sum_{\theta+1\leq j \leq 112}{x_j})-1}_2 \exp \left\{-\left(\alpha+\sum \mathbf{1}_{\{\theta+1\leq j \leq 112\}}\right) \lambda_{2}\right\} \end{align*} \] so, \[\lambda_2|\dots \sim \operatorname{Gamma}\left(3+\sum_{\theta+1\leq j \leq 112}{x_j}, \ \alpha+\sum \mathbf{1}_{\{\theta+1\leq j \leq 112\}} \right)\] and
\[ \begin{align*} p(\theta|\lambda_1, \lambda_2, \alpha, \mathbf{x}_{1:T}) \propto \left(\prod_{1\leq j \leq \theta} \frac{\lambda^{x_j}_1}{x_j !}\exp{\{-\lambda_1\}}\right) \left(\prod_{\theta +1 \leq j \leq 112} \frac{\lambda^{x_j}_2}{x_j !}\exp{\{-\lambda_2\}}\right) \frac{1}{112} \end{align*} \] i.e., for \(\theta_i = 1,\dots, 112\),
\[ p(\theta = \theta_i|\lambda_1, \lambda_2, \alpha, \mathbf{x}_{1:T}) \propto \left(\prod_{1\leq j \leq \theta_i} \frac{\lambda^{x_j}_1}{x_j !}\exp{\{-\lambda_1\}}\right) \left(\prod_{\theta_i +1 \leq j \leq 112} \frac{\lambda^{x_j}_2}{x_j !}\exp{\{-\lambda_2\}}\right) \frac{1}{112} \]
Gibbs Sampling Procedure
At the starting point of iterations, \(it=0\), draw the starting values \(\alpha^{(0)}, \lambda^{(0)}_1, \lambda^{(0)}_2, \theta^{(0)}\) from the priors.
For \(it= 1, 2, \dots\), generate in turn until convergence, \[ \begin{align*} &\alpha^{(it)}|\dots \sim \operatorname{Gamma}\left(16,\lambda^{(it-1)}_1+\lambda^{(it-1)}_2+10 \right),\\ &\lambda^{(it)}_1|\dots \sim \operatorname{Gamma}\left(3+\sum_{1\leq j \leq \theta^{(it-1)}}{x_j}, \alpha^{(it)}+\sum \mathbf{1}_{\left\{1 \leq j \leq \theta^{(it-1)}\right\}} \right),\\ &\lambda^{(it)}_2|\dots \sim \operatorname{Gamma}\left(3+\sum_{\theta^{(it-1)}+1\leq j \leq 112}{x_j}, \ \alpha^{(it)}+\sum \mathbf{1}_{\left\{\theta^{(it-1)}+1\leq j \leq 112\right\}} \right),\\ &\theta^{(it)}|\dots \text{drawn from } \{1, \ldots, 112\} \text{with pmf }p(\theta^{(it)} = \theta_i|\lambda^{(it)}_1, \lambda^{(it)}_2, \alpha^{(it)}, \mathbf{x}_{1:T}), \end{align*} \] where \[p(\theta^{(it)} = \theta_i|\lambda^{(it)}_1, \lambda^{(it)}_2, \alpha^{(it)}, \mathbf{x}_{1:T}) \propto \left(\prod_{1\leq j \leq \theta_i} \frac{{\lambda^{(it)}_1}^{x_j}}{x_j !}\exp{\{-\lambda^{(it)}_1\}}\right) \left(\prod_{\theta_i +1 \leq j \leq 112} \frac{{\lambda^{(it)}_2}^{x_j}}{x_j !}\exp{\{-\lambda^{(it)}_2\}}\right) \frac{1}{112}\] and the probability weight for \(\theta_i\) being sampled as \(\theta^{(it)}\) is \[\frac{ \left(\prod_{1\leq j \leq \theta_i} \frac{{\lambda^{(it)}_1}^{x_j}}{x_j !}\exp{\{-\lambda^{(it)}_1\}}\right) \left(\prod_{\theta_i +1 \leq j \leq 112} \frac{{\lambda^{(it)}_2}^{x_j}}{x_j !}\exp{\{-\lambda^{(it)}_2\}}\right) }{\sum_{\theta_i = 1}^{112} \left[ \left(\prod_{1\leq j \leq \theta_i} \frac{{\lambda^{(it)}_1}^{x_j}}{x_j !}\exp{\{-\lambda^{(it)}_1\}}\right) \left(\prod_{\theta_i +1 \leq j \leq 112} \frac{{\lambda^{(it)}_2}^{x_j}}{x_j !}\exp{\{-\lambda^{(it)}_2\}}\right) \right]}\]
1851+mean(gibbstheta[-burn])-1[1] 1889.836
mean(gibbslambda1[-burn]) # mean number of disasters before[1] 3.112568
mean(gibbslambda2[-burn]) # mean number of disasters after[1] 0.9507092
#plot of data
plot(coal$year,coal$disasters,type='l')
plot(coal$disasters,type='l')
#trace plot
plot(gibbsalpha[-burn],type="l")
plot(gibbslambda1[-burn],type="l")
plot(gibbslambda2[-burn],type="l")
plot(gibbstheta[-burn],type="l")
#ACF
acf(gibbsalpha[-burn])
acf(gibbslambda1[-burn])
acf(gibbslambda2[-burn])
acf(gibbstheta[-burn])
#histogram
hist(gibbsalpha[-burn])
hist(gibbslambda1[-burn])
hist(gibbslambda2[-burn])
hist(gibbstheta[-burn])
#summary
summary(gibbsalpha[-burn]) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.3280 0.9358 1.1159 1.1394 1.3143 2.3416
summary(gibbslambda1[-burn]) Min. 1st Qu. Median Mean 3rd Qu. Max.
2.179 2.914 3.104 3.113 3.300 4.236
summary(gibbslambda2[-burn]) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.5736 0.8690 0.9461 0.9507 1.0276 1.4241
summary(gibbstheta[-burn]) Min. 1st Qu. Median Mean 3rd Qu. Max.
32.00 38.00 40.00 39.84 41.00 50.00