diff --git a/src/qinfer/abstract_model.py b/src/qinfer/abstract_model.py index cc0db68..922dea4 100644 --- a/src/qinfer/abstract_model.py +++ b/src/qinfer/abstract_model.py @@ -288,6 +288,81 @@ class DifferentiableModel(Model): __metaclass__ = abc.ABCMeta # Needed in any class that has abstract methods. @abc.abstractmethod - def grad_log_likelihood(self, outcome, modelparams, expparams): - #TODO document + def score(self, outcome, modelparams, expparams): + r""" + Returns the score of this likelihood function, defined as: + + .. math:: + + q(d, \vec{x}; \vec{e}) = \vec{\nabla}_{\vec{x}} \Pr(d | \vec{x}; \vec{e}). + + Calls are represented as a four-index tensor + ``score[idx_modelparam, idx_outcome, idx_model, idx_experiment]``. + The left-most index may be suppressed for single-parameter models. + """ pass + + def fisher_information(self, modelparams, expparams): + """ + Returns the covariance of the score taken over possible outcomes, + known as the Fisher information. + + The result is represented as the four-index tensor + ``fisher[idx_modelparam_i, idx_modelparam_j, idx_model, idx_experiment]``, + which gives the Fisher information matrix for each model vector + and each experiment vector. + + .. note:: + + The default implementation of this method calls + :meth:`~DifferentiableModel.score()` for each possible outcome, + which can be quite slow. If possible, overriding this method can + give significant speed advantages. + """ + + # TODO: break into two cases, one for constant outcomes, one for + # variable. The latter will have to be a loop, which is much + # slower. + # Here, we sketch the first case. + # FIXME: completely untested! + if self.is_n_outcomes_constant: + outcomes = np.arange(self.n_outcomes(expparams)) + L = self.likelihood(outcomes, modelparams, expparams) + scores = self.score(outcomes, modelparams, expparams) + + assert len(scores.shape) in (3, 4) + + if len(scores.shape) == 3: + scores = scores[np.newaxis, :, :, :] + + # Note that E[score] = 0 by regularity assumptions, so we only + # need the expectation over the outer product. + return np.einsum("ome,iome,jome->ijme", + L, scores, scores + ) + else: + # Indexing will be a major pain here, so we need to start + # by making an empty array, so that index errors will be raised + # when (not if!) we make mistakes. + fisher = np.empty(( + self.n_modelparams, self.n_modelparams, + modelparams.shape[0], expparams.shape[0] + )) + + # Now we loop over experiments, since we cannot vectorize the + # expectation value over data. + for idx_experiment, experiment in enumerate(expparams): + experiment = experiment.reshape((1,)) + n_o = self.n_outcomes(experiment) + + outcomes = np.arange(n_o) + L = self.likelihood(outcomes, modelparams, experiment) + scores = self.score(outcomes, modelparams, experiment) + + fisher[:, :, :, idx_experiment] = np.einsum("ome,iome,jome->ijme", + L, scores, scores + ) + + return fisher + + diff --git a/src/qinfer/derived_models.py b/src/qinfer/derived_models.py index 54162d3..9a846a7 100644 --- a/src/qinfer/derived_models.py +++ b/src/qinfer/derived_models.py @@ -41,7 +41,7 @@ from scipy.stats import binom from qinfer.utils import binomial_pdf -from qinfer.abstract_model import Model +from qinfer.abstract_model import Model, DifferentiableModel from qinfer._lib import enum # <- TODO: replace with flufl.enum! from qinfer.ale import binom_est_error @@ -218,7 +218,7 @@ def is_n_outcomes_constant(self): @property def modelparam_names(self): - return self._model.modelparam_names + return self.decorated_model.modelparam_names ## METHODS ## @@ -263,7 +263,7 @@ def simulate_experiment(self, modelparams, expparams, repeat=1): expparams['x'] if self._expparams_scalar else expparams) dist = binom( - expparams['n_meas'].astype('int64'), # ← Really, NumPy? + expparams['n_meas'].astype('int'), # ← Really, NumPy? pr1[0, :, :] ) sample = ( @@ -277,6 +277,28 @@ def simulate_experiment(self, modelparams, expparams, repeat=1): ], axis=0) return os[0,0,0] if os.size == 1 else os +class DifferentiableBinomialModel(BinomialModel, DifferentiableModel): + """ + Extends :class:`BinomialModel` to take advantage of differentiable + two-outcome models. + """ + + def __init__(self, decorated_model): + if not isinstance(decorated_model, DifferentiableModel): + raise TypeError("Decorated model must also be differentiable.") + BinomialModel.__init__(self, decorated_model) + + def score(self, outcomes, modelparams, expparams): + raise NotImplementedError("Not yet implemented.") + + def fisher_information(self, modelparams, expparams): + # Since the FI simply adds, we can multiply the single-shot + # FI provided by the underlying model by the number of measurements + # that we perform. + two_outcome_fi = self.decorated_model.fisher_information( + modelparams, expparams + ) + return two_outcome_fi * expparams['n_meas'] ## TESTING CODE ############################################################### diff --git a/src/qinfer/distributions.py b/src/qinfer/distributions.py index ad1f1c3..ce6d9d9 100644 --- a/src/qinfer/distributions.py +++ b/src/qinfer/distributions.py @@ -209,20 +209,24 @@ def grad_log_pdf(self, x): class MultivariateNormalDistribution(Distribution): def __init__(self, mean, cov): - self.mean = mean + # Flatten the mean first, so we have a strong guarantee about its + # shape. + + self.mean = np.array(mean).flatten() self.cov = cov self.invcov = la.inv(cov) @property def n_rvs(self): - return self.mean.shape[1] + return self.mean.shape[0] - def sample(self): + def sample(self, n=1): - return np.dot(la.sqrtm(self.cov), np.random.randn(self.n_rvs)) + self.mean + return np.einsum("ij,nj->ni", la.sqrtm(self.cov), np.random.randn(n, self.n_rvs)) + self.mean def grad_log_pdf(self, x): - return -np.dot(self.invcov,(x - self.mean[0])) + + return -np.dot(self.invcov,(x - self.mean).transpose()).transpose() class SlantedNormalDistribution(Distribution): diff --git a/src/qinfer/smc.py b/src/qinfer/smc.py index 6abb27e..da25d7c 100644 --- a/src/qinfer/smc.py +++ b/src/qinfer/smc.py @@ -390,10 +390,10 @@ def n_rvs(self): def sample(self, n=1): # TODO: cache this. cumsum_weights = np.cumsum(self.particle_weights) - return self.particle_locations[cumsum_weights.searchsorted( + return self.particle_locations[np.minimum(cumsum_weights.searchsorted( np.random.random((n,)), side='right' - )] + ), len(cumsum_weights) - 1)] ## ESTIMATION METHODS ##################################################### @@ -946,57 +946,111 @@ class SMCUpdaterBCRB(SMCUpdater): functionality. Models considered by this class must be differentiable. + + In addition to the arguments taken by :class:`SMCUpdater`, this class + takes the following keyword-only arguments: + + :param bool adaptive: If `True`, the updater will track both the + non-adaptive and adaptive Bayes Information matrices. + :param initial_bim: If the regularity conditions are not met, then taking + the outer products of gradients over the prior will not give the correct + initial BIM. In such cases, ``initial_bim`` can be set to the correct + BIM corresponding to having done no experiments. """ def __init__(self, *args, **kwargs): - SMCUpdater.__init__(self, *args, **kwargs) + SMCUpdater.__init__(self, *args, **{ + key: kwargs[key] for key in kwargs + if key in [ + 'resampler_a', 'resampler', 'resample_thresh', 'model', + 'prior', 'n_particles' + ] + }) if not isinstance(self.model, DifferentiableModel): raise ValueError("Model must be differentiable.") - self.current_bim = np.sum(np.array([ - outer_product(self.prior.grad_log_pdf(particle)) - for particle in self.particle_locations - ]), axis=0) / self.n_particles + # TODO: fix distributions to make grad_log_pdf return the right + # shape, such that the indices are + # [idx_model, idx_param] → [idx_model, idx_param], + # so that prior.grad_log_pdf(modelparams[i, j])[i, k] + # returns the partial derivative with respect to the kth + # parameter evaluated at the model parameter vector + # modelparams[i, :]. + if 'initial_bim' not in kwargs or kwargs['initial_bim'] is None: + gradients = self.prior.grad_log_pdf(self.particle_locations) + self.current_bim = np.sum( + gradients[:, :, np.newaxis] * gradients[:, np.newaxis, :], + axis=0 + ) / self.n_particles + else: + self.current_bim = kwargs['initial_bim'] + + # Also track the adaptive BIM, if we've been asked to. + if "adaptive" in kwargs and kwargs["adaptive"]: + self._track_adaptive = True + # Both the prior- and posterior-averaged BIMs start + # from the prior. + self.adaptive_bim = self.current_bim.copy() + else: + self._track_adaptive = False + # TODO: since we are guaranteed differentiability, and since SMCUpdater is + # now a Distribution subclass representing posterior sampling, write + # a grad_log_pdf for the posterior distribution, perhaps? - def hypothetical_bim(self, expparams): - # E_{prior} E_{data | model, exp} [outer-product of grad-log-likelihood] - like_bim = np.zeros(self.current_bim.shape) - - # If there is only 1 model parameter, things can be more easily vectorized - if self.model.n_modelparams == 1: - outcomes = np.arange(self.model.n_outcomes(expparams)) - modelparams = self.prior.sample(self.n_particles) - weights = np.ones((self.n_particles,)) / self.n_particles - grad = self.model.grad_log_likelihood(outcomes, modelparams, expparams)**2 - L = self.model.likelihood(outcomes, modelparams, expparams) - - like_bim = np.sum(weights[:,np.newaxis] * np.sum(grad * L,0)) - + def _bim(self, modelparams, expparams, modelweights=None): + # TODO: document + # rough idea of this function is to take expectations of an + # FI over some distribution, here represented by modelparams. + + # NOTE: The signature of this function is a bit odd, but it allows + # us to represent in one function both prior samples of uniform + # weight and weighted samples from a posterior. + # Because it's a bit odd, we make it a private method and expose + # functionality via the prior_bayes_information and + # posterior_bayes_information methods. + + # About shapes: the FI we will be averaging over has four indices: + # FI[i, j, m, e], i and j being matrix indices, m being a model index + # and e being a model index. + # We will thus want to return an array of shape BI[i, j, e], reducing + # over the model index. + fi = self.model.fisher_information(modelparams, expparams) + + # We now either reweight and sum, or sum and divide, based on whether we + # have model weights to consider or not. + if modelweights is None: + # Assume uniform weights. + bim = np.sum(fi, axis=2) / modelparams.shape[0] else: - for idx_particle in xrange(self.n_particles): + bim = np.einsum("m,ijme->ije", modelweights, fi) + + return bim - modelparams = self.prior.sample() - - weight = 1 / self.n_particles - - for outcome in np.arange(self.model.n_outcomes(expparams))[...,np.newaxis]: - - grad = outer_product(self.model.grad_log_likelihood(outcome, modelparams, expparams))[0] - L = self.model.likelihood(outcome, modelparams, expparams) - - like_bim += weight * grad * L - - return self.current_bim + like_bim + def prior_bayes_information(self, expparams, n_samples=None): + if n_samples is None: + n_samples = self.particle_locations.shape[0] + return self._bim(self.prior.sample(n_samples), expparams) + def posterior_bayes_information(self, expparams): + return self._bim( + self.particle_locations, expparams, + modelweights=self.particle_weights + ) def update(self, outcome, expparams): # Before we update, we need to commit the new Bayesian information # matrix corresponding to the measurement we just made. - self.current_bim = self.hypothetical_bim(expparams) + self.current_bim += self.prior_bayes_information(expparams)[:, :, 0] + + # If we're tracking the information content accessible to adaptive + # algorithms, then we must use the current posterior as the prior + # for the next step, then add that accordingly. + if self._track_adaptive: + self.adaptive_bim += self.posterior_bayes_information(expparams)[:, :, 0] # We now can update as normal. SMCUpdater.update(self, outcome, expparams) diff --git a/src/qinfer/test_models.py b/src/qinfer/test_models.py index 4fec6dd..a3a0243 100644 --- a/src/qinfer/test_models.py +++ b/src/qinfer/test_models.py @@ -105,13 +105,15 @@ def likelihood(self, outcomes, modelparams, expparams): # Now we concatenate over outcomes. return Model.pr0_to_likelihood_array(outcomes, pr0) - def grad_log_likelihood(self, outcome, modelparams, expparams): + def score(self, outcomes, modelparams, expparams): #TODO: vectorize this + outcomes = outcomes[:, np.newaxis, np.newaxis] + arg = modelparams * expparams / 2 return ( - ( expparams / np.tan(arg)) ** (outcome) * - (-expparams * np.tan(arg)) ** (1-outcome) + ( expparams / np.tan(arg)) ** (outcomes) * + (-expparams * np.tan(arg)) ** (1-outcomes) ) class NoisyCoinModel(Model):