I have data that is a 50:50 mix of a normal distribution and a constant value:
numdata = 10000
data = np.random.normal(0.0,1.0,numdata).astype(np.float32)
data[int(numdata/2):] = 0.0
plt.hist(data,30,density=True)
My task is to fit a mixture density to that data. I'm using a tfd.Mixture with tfd.Normal and tfd.Deterministic The known (in case of sample data) ratio of Normal to Deterministic is 0.5 My MCMC instead returns a ration of 0.83 in favor of Normal.
Is there a better way to fit this distribution with the correct ratio?
Here is a complete sample code:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import tensorflow as tf
import tensorflow_probability as tfp
import matplotlib.pyplot as plt
tfd = tfp.distributions
tfb = tfp.bijectors
import numpy as np
from time import time
numdata = 10000
data = np.random.normal(0.0,1.0,numdata).astype(np.float32)
data[int(numdata/2):] = 0.0
_=plt.hist(data,30,density=True)
root = tfd.JointDistributionCoroutine.Root
def dist_fn(rv_p,rv_mu):
rv_cat = tfd.Categorical(probs=tf.stack([rv_p, 1.-rv_p],-1))
rv_norm = tfd.Normal(rv_mu,1.0)
rv_zero = tfd.Deterministic(tf.zeros_like(rv_mu))
rv_mix = tfd.Independent(
tfd.Mixture(cat=rv_cat,
components=[rv_norm,rv_zero]),
reinterpreted_batch_ndims=1)
return rv_mix
def model_fn():
rv_p = yield root(tfd.Sample(tfd.Uniform(0.0,1.0),1))
rv_mu = yield root(tfd.Sample(tfd.Uniform(-1.,1. ),1))
rv_mix = yield dist_fn(rv_p,rv_mu)
jd = tfd.JointDistributionCoroutine(model_fn)
unnormalized_posterior_log_prob = lambda *args: jd.log_prob(args + (data,))
n_chains = 1
p_init = [0.3]
p_init = tf.cast(p_init,dtype=tf.float32)
mu_init = 0.1
mu_init = tf.stack([mu_init]*n_chains,axis=0)
initial_chain_state = [
p_init,
mu_init,
]
bijectors = [
tfb.Sigmoid(), # p
tfb.Identity(), # mu
]
step_size = 0.01
num_results = 50000
num_burnin_steps = 50000
kernel=tfp.mcmc.TransformedTransitionKernel(
inner_kernel=tfp.mcmc.HamiltonianMonteCarlo(
target_log_prob_fn=unnormalized_posterior_log_prob,
num_leapfrog_steps=2,
step_size=step_size,
state_gradients_are_stopped=True),
bijector=bijectors)
kernel = tfp.mcmc.SimpleStepSizeAdaptation(
inner_kernel=kernel, num_adaptation_steps=int(num_burnin_steps * 0.8))
#XLA optim
@tf.function(autograph=False, experimental_compile=True)
def graph_sample_chain(*args, **kwargs):
return tfp.mcmc.sample_chain(*args, **kwargs)
st = time()
trace,stats = graph_sample_chain(
num_results=num_results,
num_burnin_steps=num_burnin_steps,
current_state=initial_chain_state,
kernel=kernel)
et = time()
print(et-st)
ptrace, mutrace = trace
plt.subplot(121)
_=plt.hist(ptrace.numpy(),100,density=True)
plt.subplot(122)
_=plt.hist(mutrace.numpy(),100,density=True)
print(np.mean(ptrace),np.mean(mutrace))
The resulting distributions of p and mu are like this:
Obviously, it should have a mean of p at 0.5 I suspect there might be something wrong with model_fn(). I tried to evaluate log_prob of the model at different p values and indeed the "optimal" is around 0.83 I just don't understand why and how to fix it in order to reconstruct the original mixture.
[EDIT] A "simpler" demo code with pymc3. Still the same behavior, result is 0.83 instead of 0.5
import pymc3 as pm
import numpy as np
import arviz as az
import matplotlib.pyplot as plt
numdata = 1000
data1 = np.random.normal(0.0,1.0,numdata).astype(np.float32)
data2 = np.zeros(numdata).astype(np.float32)
data = np.concatenate((data1,data2))
_=plt.hist(data,30,density=True)
with pm.Model() as model:
norm = pm.Normal.dist(0.0,1.0)
zero = pm.Constant.dist(0.0)
components = [norm,zero]
w = pm.Dirichlet('p', a=np.array([1, 1])) # two mixture component weights.
like = pm.Mixture('data', w=w, comp_dists=components, observed=data)
posterior = pm.sample()
idata = az.from_pymc3(posterior)
az.plot_posterior(posterior)