import os
"KERAS_BACKEND"] = "tensorflow"
os.environ[import keras
import bayesflow as bf
import numpy as np
import matplotlib.pyplot as plt
Estimating parameters of a normal distribution
Simple example
Simulator
def context(batch_size, n=None):
if n is None:
= np.random.randint(10, 101)
n
return dict(n=n)
def prior(mu=None, sigma=None):
if mu is None:
= np.random.normal()
mu if sigma is None:
= np.random.gamma(shape=2)
sigma
return dict(mu=mu, sigma=sigma)
def likelihood(n, mu, sigma):
= np.random.normal(mu, sigma, size=n)
y
return dict(y=y)
def summary(y):
= np.mean(y)
mean = np.std(y, ddof=1)
sd
return dict(mean=mean, sd=sd)
= bf.make_simulator([prior, likelihood, summary], meta_fn=context) simulator
Prior predictives
= simulator.sample(1000) data
=bf.diagnostics.pairs_samples(data, variable_keys=["mean", "sd"]) fig
Approximator
= (bf.Adapter()
adapter "n", to="mean")
.broadcast("sigma", lower=0)
.constrain("n", "mean", "sd"], into="inference_conditions")
.concatenate(["mu", "sigma"], into="inference_variables")
.concatenate(["y")
.drop( )
adapter(data)
{'inference_conditions': array([[73. , -0.3101565 , 1.85986046],
[73. , -0.3378794 , 4.43681815],
[73. , 0.66380707, 3.48889386],
...,
[73. , 1.00937811, 0.40078604],
[73. , 0.63226432, 0.55506636],
[73. , -1.84457355, 0.95182243]]),
'inference_variables': array([[-0.25619292, 2.00083979],
[ 0.0145662 , 4.08056985],
[ 0.36442674, 3.1198835 ],
...,
[ 0.91677106, -0.7230314 ],
[ 0.68635459, -0.42010279],
[-1.86425704, 0.65961324]])}
= bf.approximators.ContinuousApproximator(
approximator =bf.networks.CouplingFlow(permutation="swap", subnet_kwargs=dict(dropout=False)),
inference_network=adapter
adapter )
=10
epochs=100
num_batches=256 batch_size
= keras.optimizers.schedules.CosineDecay(initial_learning_rate=1e-3, decay_steps=epochs * num_batches)
schedule = keras.optimizers.Adam(learning_rate=schedule) optimizer
compile(optimizer) approximator.
=approximator.fit(
history=epochs,
epochs=num_batches,
num_batches=batch_size,
batch_size=simulator) simulator
INFO:bayesflow:Building dataset from simulator instance of SequentialSimulator.
INFO:bayesflow:Using 10 data loading workers.
INFO:bayesflow:Building on a test batch.
Epoch 1/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 13ms/step - loss: 3.1127 - loss/inference_loss: 3.1127
Epoch 2/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 2.1562 - loss/inference_loss: 2.1562
Epoch 3/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 1.6789 - loss/inference_loss: 1.6789
Epoch 4/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 0.9218 - loss/inference_loss: 0.9218
Epoch 5/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 1.0360 - loss/inference_loss: 1.0360
Epoch 6/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.4432 - loss/inference_loss: 0.4432
Epoch 7/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - loss: 0.3578 - loss/inference_loss: 0.3578
Epoch 8/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - loss: 0.3496 - loss/inference_loss: 0.3496
Epoch 9/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - loss: 0.1186 - loss/inference_loss: 0.1186
Epoch 10/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 2s 17ms/step - loss: 0.0109 - loss/inference_loss: 0.0109
=bf.diagnostics.plots.loss(history) fig
Validation
= simulator.sample(1000, n=50)
test_data = dict(mu=test_data["mu"], sigma=test_data["sigma"])
prior = approximator.sample(num_samples=500, conditions=test_data) posterior
=bf.diagnostics.plots.calibration_ecdf(estimates=posterior, targets=prior) fig
=bf.diagnostics.z_score_contraction(estimates=posterior, targets=prior) fig
=bf.diagnostics.plots.recovery(estimates=posterior, targets=prior) fig
Inference
= dict(n=50, mean=np.array([[0.5]]), sd=np.array([[2]])) inference_data
= approximator.sample(num_samples=1000, conditions=inference_data) posterior
=bf.diagnostics.pairs_posterior(estimates=posterior, priors=prior) fig
Using a summary network
Approximator
= (bf.Adapter()
adapter "n", to="y")
.broadcast("y")
.as_set("sigma", lower=0)
.constrain("n", "inference_conditions")
.rename("y", "summary_variables")
.rename("mu", "sigma"], into="inference_variables")
.concatenate(["mean", "sd"])
.drop([ )
= bf.BasicWorkflow(
workflow =bf.networks.CouplingFlow(subnet_kwargs=dict(dropout=False)),
inference_network=bf.networks.DeepSet(
summary_network="normal",
base_distribution=False,
dropout=(16, 16),
mlp_widths_equivariant=(16, 16),
mlp_widths_invariant_inner=(16, 16),
mlp_widths_invariant_outer=(16, 16)
mlp_widths_invariant_last
),=simulator,
simulator=adapter,
adapter=["mu", "sigma"],
inference_variables="n",
inference_conditions="y"
summary_variables )
=workflow.fit_online(epochs=epochs, num_batches_per_epoch=num_batches, batch_size=batch_size) history
INFO:bayesflow:Fitting on dataset instance of OnlineDataset.
INFO:bayesflow:Building on a test batch.
Epoch 1/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 12s 37ms/step - loss: 3.3305 - loss/inference_loss: 2.8199 - loss/summary_loss: 0.5106
Epoch 2/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 5s 50ms/step - loss: 1.7787 - loss/inference_loss: 1.4463 - loss/summary_loss: 0.3324
Epoch 3/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 5s 51ms/step - loss: 1.2010 - loss/inference_loss: 0.9137 - loss/summary_loss: 0.2873
Epoch 4/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 5s 54ms/step - loss: 0.8284 - loss/inference_loss: 0.5727 - loss/summary_loss: 0.2557
Epoch 5/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 57ms/step - loss: 0.5715 - loss/inference_loss: 0.3398 - loss/summary_loss: 0.2317
Epoch 6/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 55ms/step - loss: 0.4893 - loss/inference_loss: 0.2788 - loss/summary_loss: 0.2105
Epoch 7/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 55ms/step - loss: 0.4544 - loss/inference_loss: 0.2548 - loss/summary_loss: 0.1996
Epoch 8/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 58ms/step - loss: 0.3392 - loss/inference_loss: 0.1429 - loss/summary_loss: 0.1962
Epoch 9/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 59ms/step - loss: 0.3685 - loss/inference_loss: 0.1744 - loss/summary_loss: 0.1941
Epoch 10/10
100/100 ━━━━━━━━━━━━━━━━━━━━ 6s 60ms/step - loss: 0.3299 - loss/inference_loss: 0.1355 - loss/summary_loss: 0.1945
Validation
= simulator.sample(1000, n=50) test_data
=workflow.plot_default_diagnostics(test_data=test_data) plots
Inference
= dict(
inference_data = np.random.normal(loc=1, scale=0.6, size=(1, 50)),
y = 50) n
=2_000 num_samples
=workflow.sample(num_samples=num_samples, conditions=inference_data)
posterior=keras.tree.map_structure(np.squeeze, posterior) posterior
=bf.diagnostics.pairs_posterior(posterior) fig
= simulator.sample(num_samples, n=50, **posterior) posterior_predictives
"y"]) summary(inference_data[
{'mean': np.float64(1.0854894225709761), 'sd': np.float64(0.5950735427958067)}
=bf.diagnostics.pairs_samples(posterior_predictives, variable_keys=["mean", "sd"]) fig
=plt.violinplot(posterior_predictives["y"], showmeans=True, side="low")
fig=plt.scatter(x=[i+1 for i in range(inference_data["n"])], y=inference_data["y"], c="black", zorder=100) fig
=workflow.summary_network(workflow.simulate_adapted(1000)['summary_variables'])
summaries_null=workflow.summary_network(workflow.simulate_adapted(500)['summary_variables'], training=False) summaries_ref
= [
mmd_null +1]).numpy() for i in range(500)
bf.metrics.functional.maximum_mean_discrepancy(summaries_null, summaries_ref[i:i ]
=workflow.summary_network(adapter(inference_data, strict=False)["summary_variables"])
summaries_obs=bf.metrics.functional.maximum_mean_discrepancy(summaries_null, summaries_obs) mmd_obs
=bf.diagnostics.plots.mmd_hypothesis_test(mmd_null, mmd_obs) fig
Further exercises
- Train a BayesFlow model that estimates the mean vector and a variance-covariance matrix of a 2D Gaussian.
- Train a BayesFlow model that estimates parameter of any other distribution. You are free to choose which (some examples: Gamma, Negative binomial, Weibull,…)
- The implementation of the normal model imposes one particular configuration of the priors on the parameters
"mu"
and"sigma"
. This is relatively impractical, because not every time such priors would be reasonable. It is also relatively common to fit a model with different priors to investigate how robust are your inferences against prior specification. To do this, it is handy if you train a single BayesFlow model that can make inferences with different prior specifications. You can do this by varying the prior specification during simulations. For example,
= np.random.uniform(-100, 100)
hyper_mu_mu = np.random.normal(hyper_mu_mu) mu
would generate different priors for mu, depending on the value of the hyperparameter \(\mu_\mu\). If you condition the network on the values of the hyperparameters, you can train the networks to be able to use different priors during inference. Try to implement such a network.