library(mixtools)
We consider a collection of random variables \((X_1, \dots, X_n)\) associated with \(n\) individuals drawn from \(Q\) populations. The label of each individual describes the population (or class) to which it belongs and is unobserved. The \(Q\) classes have a priori distribution \({\boldsymbol \alpha} = (\alpha_1,\dots,\alpha_Q)\) with \(\alpha_q = \mathbb{P}(Z_i = q)\). In other word, the latent variable \(Z_i \in \{1,..,Q\}\) indicating the label follow a multinomial distribution \(Z_i \sim \mathcal{M}(1,\boldsymbol\alpha)\), such as \(\sum_{q=1}^Q \alpha_q = 1\).
The distribution of \(X_i\) conditional on the label of \(i\) is assumed to be a univariate gaussian distribution with unknown parameters, that is, \(X_i | Z_i = q \sim \mathcal{N}(\mu_q,\sigma^2_q)\).
We further introduce the additional notation \(Z_{iq} = \mathbb 1_{\{Z_i = q\}}\) and \(\tau_{iq}= \mathbb{P}(Z_{iq}=1|X_i)\) for posterior probabilities.
We denote the vector of parameters to be estimated by \(\mathbf{\mu} = (\mu_1,\dots,\mu_Q)\), \(\mathbf{\sigma^2} = (\sigma^2_1,\dots,\sigma^2_Q)\), \(\boldsymbol\tau = (\tau_{iq, i=1,\dots,n; q=1,\dots Q})\).
Likelihood. Write the model complete-data loglikelihood.
E-step. For fixed values of \(\hat{\mu}_q, \hat{\sigma}_{q}^{2}\) and \(\hat\alpha_q\), give the expression of the estimates of the posterior probabilities \(\tau_{iq}= \mathbb{P}(Z_{iq}=1|X_i)\).
M-step. For fixed values of \(\hat{\tau}_{iq}\), show that the maximization step leads to the following estimator for the model parameters: \[ \hat{\alpha}_q = \frac 1n \sum_{i=1}^n \hat{\tau}_{iq}, \:\:\:\:\: \hat{\mu}_q = \frac{\sum_i \hat{\tau}_{iq} x_i}{\sum_i \hat{\tau}_{iq}}, \:\:\:\:\: \hat{\sigma}_q^2 = \frac{\sum_i \hat{\tau}_{iq} (x_i - \hat{\mu}_q)^2}{\sum_i \hat{\tau}_{iq}}. \]
Implementation. Complete the following code.
<- function(X, Z, theta) {
get_cloglik #returns the complete model loglikelihood
<- length(X); Q <- ncol(Z)
n <- theta$alpha; mu <- theta$mu; sigma <- theta$sigma
alpha <- # to write
Xs <- # to write
res
res
}
<- function(X, tau) {
M_step #maximization step
<- length(X); Q <- ncol(tau)
n <- # to write
alpha <- # to write
mu <- # to write
sigma list(alpha = alpha, mu = mu, sigma = sigma)
}
<- function(X, theta) {
E_step #expectation step
<- # to write
probs <- rowSums(probs)
likelihoods list(tau = probs / likelihoods, loglik = sum(log(likelihoods)))
}
<- function(X, Q,
EM_mixture init.cl = base::sample(1:Q, n, rep=TRUE), max.iter=100, eps=1e-5) {
<- length(X); tau <- matrix(0, n, Q); tau[cbind(1:n, init.cl)] <- 1
n <- vector("numeric", max.iter)
loglik <- vector("numeric", max.iter)
Eloglik <- 0; cond <- FALSE
iter
while (!cond) {
<- iter + 1
iter ## M step
<- # to write
theta ## E step
<- # to write
res_Estep <- # to write
tau ## check consistency
<- # to write
loglik[iter] <- # to write
Eloglik[iter] if (iter > 1)
<- (iter>=max.iter) | Eloglik[iter]-Eloglik[iter-1] < eps
cond
}
<- list(alpha = theta$alpha, mu = theta$mu, sigma = theta$sigma,
res tau = tau, cl = apply(tau, 1, which.max),
Eloglik = Eloglik[1:iter],
loglik = loglik[1:iter])
res }
<- 5 ; sigma1 <- 1; n1 <- 100
mu1 <- 10 ; sigma2 <- 1; n2 <- 200
mu2 <- 15 ; sigma3 <- 2; n3 <- 50
mu3 <- 20 ; sigma4 <- 3; n4 <- 100
mu4 <- rep(1:4,c(n1,n2,n3,n4))
cl <- c(rnorm(n1,mu1,sigma1),rnorm(n2,mu2,sigma2),
x rnorm(n3,mu3,sigma3),rnorm(n4,mu4,sigma4))
<- length(x)
n
## we randomize the class ordering
<- base::sample(1:n)
rnd <- cl[rnd]
cl <- x[rnd]
x
<- c(n1,n2,n3,n4)/n
alpha
curve(alpha[1]*dnorm(x,mu1,sigma1) +
2]*dnorm(x,mu2,sigma2) +
alpha[3]*dnorm(x,mu3,sigma3) +
alpha[4]*dnorm(x,mu4,sigma3),
alpha[col="blue", lty=1, from=0,to=30, n=1000,
main="Theoretical Gaussian mixture and its components",
xlab="x", ylab="density")
curve(alpha[1]*dnorm(x,mu1,sigma1), col="red", add=TRUE, lty=2)
curve(alpha[2]*dnorm(x,mu2,sigma2), col="red", add=TRUE, lty=2)
curve(alpha[3]*dnorm(x,mu3,sigma3), col="red", add=TRUE, lty=2)
curve(alpha[4]*dnorm(x,mu4,sigma4), col="red", add=TRUE, lty=2)
rug(x)
Try your EM algorithm on the simulated data. Comment the results. Consider different initialization. Compare with the output of the function normalmixEm in the package mixtools.