diff --git a/RunDEMC/de.py b/RunDEMC/de.py index 744da8b..de2658a 100644 --- a/RunDEMC/de.py +++ b/RunDEMC/de.py @@ -16,6 +16,7 @@ def njit(func): return func import numpy as np +from numpy.random import default_rng import sys import random import time diff --git a/RunDEMC/demc.py b/RunDEMC/demc.py index 08a04ad..9dafe01 100644 --- a/RunDEMC/demc.py +++ b/RunDEMC/demc.py @@ -10,6 +10,7 @@ # global imports import numpy as np +from numpy.random import default_rng import sys import random import time @@ -106,10 +107,16 @@ def __init__(self, name, params, like_fun, pop_recarray=True, pop_parallel=False, n_jobs=-1, backend=None, - use_dask=False): + use_dask=False, + rng=None): """ DEMC """ + self._rng = rng + + if self._rng is None: + self._rng = default_rng() + # set the vars self._name = name self._params = params # can't be None @@ -288,7 +295,7 @@ def _get_split_ind(self): if self._rand_split: # randomize the indices - all_ind = np.random.permutation(num_particles) + all_ind = self._rng.permutation(num_particles) else: all_ind = np.arange(num_particles) @@ -300,7 +307,7 @@ def _get_split_ind(self): def _migrate(self): # pick which items will migrate - num_to_migrate = np.random.random_integers(2, self._num_chains) + num_to_migrate = self._rng.integers(2, self._num_chains) to_migrate = random.sample( list(range(self._num_chains)), num_to_migrate) @@ -328,7 +335,7 @@ def _migrate(self): mh_prob = np.exp(log_diff) if np.isnan(mh_prob): mh_prob = 0.0 - keep = (mh_prob - np.random.rand()) > 0.0 + keep = (mh_prob - self._rng.random()) > 0.0 if keep: keepers.append({'ind': j, 'particle': self._particles[-1][i], @@ -362,7 +369,7 @@ def sample(self, num_iter, burnin=False, migration_prob=0.0): else: progress = range(num_iter) for i in progress: - if np.random.rand() < migration_prob: + if self._rng.random() < migration_prob: # migrate, which is deterministic and done in place # if self._verbose: # sys.stdout.write('x ') @@ -401,7 +408,7 @@ def sample(self, num_iter, burnin=False, migration_prob=0.0): evo_args = self._get_evo_args(burnin=burnin) # get new state - evo_res = _evolve(**evo_args) + evo_res = _evolve(self._rng, **evo_args) # save the state self._save_evolution(**evo_res) @@ -743,7 +750,7 @@ def _calc_log_prior(*props, priors, cur_split=None): return log_priors -def _evolve(prop_gen=None, parts_ind=None, split_ind=None, +def _evolve(rng, prop_gen=None, parts_ind=None, split_ind=None, particles=None, param_names=None, transforms=None, priors=None, fixed=None, log_likes=None, weights=None, use_priors=True, recalc_likes=False, @@ -812,7 +819,7 @@ def _evolve(prop_gen=None, parts_ind=None, split_ind=None, prev_weights = prev_log_likes # calc acceptance in log space - keep = np.log(np.random.rand(len(log_diff))) < log_diff + keep = np.log(rng.random(len(log_diff))) < log_diff # handle much greater than one #log_diff[log_diff > 0.0] = 0.0 @@ -820,7 +827,7 @@ def _evolve(prop_gen=None, parts_ind=None, split_ind=None, # now exp so we can get the other probs # mh_prob = np.exp(log_diff) # mh_prob[np.isnan(mh_prob)] = 0.0 - # keep = (mh_prob - np.random.rand(len(mh_prob))) > 0.0 + # keep = (mh_prob - rng.random(len(mh_prob))) > 0.0 # modify the relevant particles # use mask the mask approach @@ -891,7 +898,7 @@ def rvs(self, size, cur_split): chains = np.arange(self._num_chains) else: # pick randomly - chains = np.random.randint(0, self._num_chains, size[0]) + chains = self._rng.integers(0, self._num_chains, size[0]) # generate the random vars using the likelihood func # pop = self._particles[-1][chains] @@ -910,7 +917,8 @@ def __init__(self, name, dist, params, proposal_gen=None, burnin_proposal_gen=None, use_priors=True, - verbose=False): + verbose=False, + rng=None): # handle the dist self._dist = dist @@ -925,7 +933,8 @@ def __init__(self, name, dist, params, initial_zeros_ok=True, use_priors=use_priors, verbose=verbose, - pop_recarray=False) + pop_recarray=False, + rng=rng) # We need to recalc likes of prev iteration in case of hyperprior self._recalc_likes = True @@ -967,7 +976,7 @@ def pdf(self, vals, cur_split): cur_split = np.arange(self._num_chains) else: # pick randomly - cur_split = np.random.randint(0, self._num_chains, len(vals)) + cur_split = self._rng.integers(0, self._num_chains, len(vals)) # generate the pdf using the likelihood func pop = _apply_param_transform(self._particles[-1][cur_split], @@ -997,7 +1006,7 @@ def logpdf(self, vals, cur_split): cur_split = np.arange(self._num_chains) else: # pick randomly - cur_split = np.random.randint(0, self._num_chains, len(vals)) + cur_split = self._rng.integers(0, self._num_chains, len(vals)) # generate the pdf using the likelihood func pop = _apply_param_transform(self._particles[-1][cur_split], @@ -1027,7 +1036,7 @@ def rvs(self, size, cur_split): chains = np.arange(num_chains)[cur_split] else: # pick randomly - chains = np.random.randint(0, num_chains, size[0]) + chains = self._rng.integers(0, num_chains, size[0]) # generate the random vars using the likelihood func # pop = self._particles[-1][chains] diff --git a/RunDEMC/dists.py b/RunDEMC/dists.py index be6c8fe..0dbe5c0 100644 --- a/RunDEMC/dists.py +++ b/RunDEMC/dists.py @@ -7,9 +7,12 @@ # ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## -import numpy as np +import scipy.stats.distributions import scipy.stats.distributions as dists -from scipy.special import gammaln +from scipy.stats._distn_infrastructure import argsreduce +from scipy.special import gammaln, betaln, xlogy, xlog1py, log1p +from numpy.random import default_rng +import numpy as np def logit(x): @@ -42,12 +45,33 @@ def multinomial(xs, ps): return np.exp(result) -def normal(mean=0.0, std=1.0): - return dists.norm(loc=mean, scale=std) +_log_half = np.log(.5) +_norm_pdf_C = np.sqrt(2*np.pi) +_norm_pdf_logC = np.log(_norm_pdf_C) +class normal: + def __init__(self, mean=0.0, std=1.0): + assert np.all(std > 0) + + self.mean = mean + self.std = std + self.rng = default_rng() + + self._second_term = -_norm_pdf_logC - log(std) + + def logpdf(self, x): + y = (x - self.mean) / self.std + return -y**2 / 2.0 + self._second_term + + def pdf(self, x): + return np.exp(self.logpdf(x)) + + def rvs(self, size=1, random_state=None): + if random_state is None: + random_state = self.rng + + return random_state.normal(self.mean, self.std, size) -import scipy.stats.distributions -from scipy.stats._distn_infrastructure import argsreduce _norm_cdf = scipy.stats.distributions._continuous_distns._norm_cdf _norm_sf = scipy.stats.distributions._continuous_distns._norm_sf _norm_isf = scipy.stats.distributions._continuous_distns._norm_isf @@ -127,7 +151,7 @@ def _pdf_fixed(self, x, *args, **kwds): ccond = cond.copy() ccond.shape = goodargs[0].shape output[cond] = (self._pdf(*goodargs) / scale)[ccond] - #place(output, cond, self._pdf(*goodargs) / scale) + # place(output, cond, self._pdf(*goodargs) / scale) if output.ndim == 0: return output[()] return output @@ -145,50 +169,229 @@ def trunc_normal(mean=0.0, std=1.0, lower=0.0, upper=1.0): return my_tn(a, b, loc=mean, scale=std) -def uniform(lower=0.0, upper=1.0): - return dists.uniform(loc=lower, scale=upper - lower) +class uniform: + def __init__(self, lower=0.0, upper=1.0): + assert np.all(lower < upper) + + self.lower = lower + self.upper = upper + self.rng = default_rng() + + self._size = upper - lower + self._inv_size = 1 / self._size + self._log_size = log(self._size) + + def logpdf(self, x): + return np.where((x >= self.lower) & (x <= self.upper), -self._log_size, np.NINF) + + def pdf(self, x): + return np.where((x >= self.lower) & (x <= self.upper), self._inv_size, 0) + + def rvs(self, size=1, random_state=None): + if random_state is None: + random_state = self.rng + return random_state.uniform(self.lower, self.upper, size) -def beta(alpha=.5, beta=.5): - return dists.beta(alpha, beta) +class beta: + def __init__(self, alpha=.5, beta=.5): + assert np.all(alpha > 0) + assert np.all(beta > 0) + self.alpha = alpha + self.beta = beta + self.rng = default_rng() -def gamma(alpha=1.0, beta=1.0): + self._neg_betaln = -betaln(self.alpha, self.beta) + + def logpdf(self, x): + return np.where((x >= 0) & (x <= 1), xlog1py(self.beta - 1.0, -x) + xlogy(self.alpha - 1.0, x) + self._neg_betaln, np.NINF) + + def pdf(self, x): + return np.where((x >= 0) & (x <= 1), np.exp(self.logpdf(x)), 0) + + def rvs(self, size=1, random_state=None): + if random_state is None: + random_state = self.rng + + return random_state.beta(self.alpha, self.beta, size) + + +class gamma: """ alpha = k beta = 1/theta """ - return dists.gamma(alpha, scale=1. / beta) + def __init__(self, alpha=1.0, beta=1.0): + assert np.all(alpha > 0) + assert np.all(beta > 0) + + self.alpha = alpha + self.beta = beta + self.rng = default_rng() -def invgamma(alpha=1.0, beta=1.0): - """ - """ - return dists.invgamma(alpha, scale=beta) + self._second_term = -gammaln(self.alpha) + log(beta) + self._theta = 1 / beta + def logpdf(self, x): + y = x*self.beta + return np.where(x >= 0, xlogy(self.alpha-1.0, y) - y + self._second_term, np.NINF) -def exp(lam=1.0): - return dists.expon(scale=1. / lam) + def pdf(self, x): + return np.where(x >= 0, np.exp(self.logpdf(x)), 0) + def rvs(self, size=1, random_state=None): + if random_state is None: + random_state = self.rng + return random_state.gamma(self.alpha, self._theta, size) -def poisson(lam=1.0): - return dists.poisson(mu=lam) +class invgamma: + """ + alpha = k + beta = 1/theta + """ + + def __init__(self, alpha=1.0, beta=1.0): + assert np.all(alpha > 0) + assert np.all(beta > 0) + + self.alpha = alpha + self.beta = beta + self.rng = default_rng() + + self._last_term = -gammaln(self.alpha) - np.log(self.beta) + + def logpdf(self, x): + y = x/self.beta + with np.errstate(divide='warn', invalid='warn'): + return np.where(x > 0, -(self.alpha+1) * np.log(y) - + np.divide(1.0, y, out=np.zeros_like(y, dtype=np.float64), where=(y!=0.)) + + self._last_term, np.NINF) + + def pdf(self, x): + return np.where(x > 0, np.exp(self.logpdf(x)), 0) + + def rvs(self, size=1, random_state=None): + if random_state is None: + random_state = self.rng + return 1. / random_state.gamma(self.alpha, self.beta, size) + + +class exp: + def __init__(self, lam=1.0): + assert np.all(lam > 0) + + self.lam = lam + self.rng = default_rng() + self._log_lam = log(self.lam) + + def logpdf(self, x): + return np.where(x >= 0, self._log_lam - x*self.lam, np.NINF) + + def pdf(self, x): + return np.where(x >= 0, np.exp(self.logpdf(x)), 0) + + def rvs(self, size=1, random_state=None): + if random_state is None: + random_state = self.rng + return random_state.exponential(1./self.lam, size) + +class poisson: + def __init__(self, lam=1.0): + assert np.all(lam > 0) + + self.lam = lam + self.rng = default_rng() + + def logpmf(self, x): + return np.where((x >= 0) & (x == np.round(x)), xlogy(x, self.lam) - gammaln(x + 1) - self.lam, np.NINF) + + def pmf(self, x): + return np.where((x >= 0) & (x == np.round(x)), np.exp(self.logpmf(x)), 0) + + def rvs(self, size=1, random_state=None): + if random_state is None: + random_state = self.rng + + return random_state.poisson(self.lam, size) + +class laplace: + def __init__(self, loc=0.0, diversity=1.0): + assert np.all(diversity > 0) + + self.loc = loc + self.diversity = diversity + self.rng = default_rng() + + self._log_diversity = np.log(diversity) + + def logpdf(self, x): + y = (x - self.loc) / self.diversity + return _log_half - np.abs(y) - self._log_diversity + + def pdf(self, x): + return np.exp(self.logpdf(x)) + + def rvs(self, size=1, random_state=None): + if random_state is None: + random_state = self.rng + + return random_state.laplace(self.loc, self.diversity, size=size) + +class students_t: + def __init__(self, mean=0, std=1.0, df=1.0): + assert np.all(std > 0) + assert np.all(df > 0) + + self.mean = mean + self.std = std + self.df = df + self.rng = default_rng() + + self._half_df_plus_one = (self.df+1)/2 + self._first_term = gammaln(self._half_df_plus_one) - gammaln(self.df/2) - 0.5*np.log(self.df*np.pi) - log(self.std) + + def logpdf(self, x): + y = (x - self.mean) / self.std + return self._first_term - self._half_df_plus_one*np.log(1+(y**2)/self.df) + + def pdf(self, x): + return np.exp(self.logpdf(x)) + + def rvs(self, size=1, random_state=None): + if random_state is None: + random_state = self.rng + + return random_state.standard_t(self.df, size)*self.std + self.mean + +def noncentral_t(mean=0, std=1.0, df=1.0, nc=0.0): + return dists.nct(df=df, nc=nc, loc=mean, scale=std) -def laplace(loc=0.0, diversity=1.0): - return dists.laplace(loc=loc, scale=diversity) +class halfcauchy: + def __init__(self, scale=1.0, loc=0.0): + assert np.all(scale > 0) -def students_t(mean=0, std=1.0, df=1.0): - return dists.t(df=df, loc=mean, scale=std) + self.scale = scale + self.loc = loc + self.rng = default_rng() + self._first_term = np.log(2.0/np.pi) - np.log(self.scale) -def noncentral_t(mean=0, std=1.0, df=1.0, nc=0.0): - return dists.nct(df=df, nc=nc, loc=mean, scale=std) + def logpdf(self, x): + y = (x - self.loc) / self.scale + return np.where(x >= self.loc, np.log(2.0/np.pi) - log1p(y**2) - np.log(self.scale), np.NINF) + def pdf(self, x): + return np.where(x >= self.loc, np.exp(self.logpdf(x)), 0) -def halfcauchy(scale=1.0, loc=0.0): - return dists.halfcauchy(loc=loc, scale=scale) + def rvs(self, size=1, random_state=None): + if random_state is None: + random_state = self.rng + + return np.abs(random_state.standard_cauchy(size))*self.scale + self.loc def epa_kernel(x, delta): diff --git a/RunDEMC/gibbs.py b/RunDEMC/gibbs.py index 391bbcf..c13cbbb 100644 --- a/RunDEMC/gibbs.py +++ b/RunDEMC/gibbs.py @@ -8,6 +8,8 @@ ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## import numpy as np +from numpy.random import default_rng + import time from .dists import normal, invgamma @@ -60,12 +62,17 @@ class NormalHyperPrior(object): Acceptance rate as an array. """) - def __init__(self, name, mu, sigma, alpha, beta): + def __init__(self, name, mu, sigma, alpha, beta, rng=None): self._name = name self._mu = mu self._sigma = sigma self._alpha = alpha self._beta = beta + + self._rng = rng + + if self._rng is None: + self._rng = default_rng() self._params = [Param(name='mean', prior=normal(self._mu, self._sigma)), @@ -172,7 +179,7 @@ def pdf(self, vals, cur_split): cur_split = np.arange(self._num_chains) else: # pick randomly - cur_split = np.random.randint(0, self._num_chains, len(vals)) + cur_split = self._rng.integers(0, self._num_chains, len(vals)) # generate the pdf using the likelihood func pop = _apply_param_transform(self._particles[-1][cur_split], @@ -197,7 +204,7 @@ def logpdf(self, vals, cur_split): cur_split = np.arange(self._num_chains) else: # pick randomly - cur_split = np.random.randint(0, self._num_chains, len(vals)) + cur_split = self._rng.integers(0, self._num_chains, len(vals)) # generate the pdf using the likelihood func pop = _apply_param_transform(self._particles[-1][cur_split], @@ -223,7 +230,7 @@ def rvs(self, size, cur_split): chains = np.arange(num_chains)[cur_split] else: # pick randomly - chains = np.random.randint(0, num_chains, size[0]) + chains = self._rng.integers(0, num_chains, size[0]) # generate the random vars using the likelihood func # pop = self._particles[-1][chains] diff --git a/RunDEMC/hierarchy.py b/RunDEMC/hierarchy.py index c0dbbd9..e762274 100644 --- a/RunDEMC/hierarchy.py +++ b/RunDEMC/hierarchy.py @@ -10,6 +10,7 @@ import sys import numpy as np +from numpy.random import default_rng from fastprogress.fastprogress import progress_bar from .demc import Model, HyperPrior, FixedParams @@ -63,7 +64,7 @@ class Hierarchy(object): def __init__(self, models, num_chains=None, parallel=None, use_dask=False, delay_hyper_burnin=False, - separate_fixed=False): + separate_fixed=False, rng=None): """Figures out the HyperPriors and FixedParams from list of submodels. """ self._models = _flatten(models) @@ -74,6 +75,11 @@ def __init__(self, models, num_chains=None, self._use_dask = use_dask self._delay_hyper_burnin = delay_hyper_burnin self._separate_fixed = separate_fixed + + self._rng = rng + + if self._rng is None: + self._rng = default_rng() def save(self, filename, **kwargs): # loop over models adding to a dict of dicts @@ -399,7 +405,7 @@ def sample(self, num_iter=1, burnin=False, migration_prob=0.0, jobs = [] for m in self._models: # check if migrate - if np.random.rand() < migration_prob: + if self._rng.random() < migration_prob: # migrate, which is deterministic and done in place m._migrate() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..cb5ba73 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,12 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See the COPYING file distributed along with the RunDEMC package for the +# copyright and license terms. +# +### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## + +""" +RunDEMC - Approximate Bayesian Computation with Differential Evolution +""" diff --git a/tests/test_dists.py b/tests/test_dists.py new file mode 100644 index 0000000..50046df --- /dev/null +++ b/tests/test_dists.py @@ -0,0 +1,143 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# ex: set sts=4 ts=4 sw=4 et: +### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## +# +# See the COPYING file distributed along with the RunDEMC package for the +# copyright and license terms. +# +### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## + +import unittest +from numpy.testing import * + +from RunDEMC import dists +import scipy.stats as stats +import numpy as np +from numpy.random import default_rng + + +class TestDists(unittest.TestCase): + @classmethod + def setUpClass(cls): + reals = np.linspace(-10, 10, 100) + naturals = np.arange(0, 10) + + cls.x_vals = np.concatenate((reals, naturals)) + cls.seed = default_rng().integers(low=0, high=10000, size=1) + cls.rv_size = 10 + + def dist_assertions(self, scipy, demc, pdf=True, test_rvs=True): + if pdf: + assert_allclose(scipy.pdf(self.x_vals), demc.pdf(self.x_vals)) + assert_allclose(scipy.logpdf(self.x_vals), demc.logpdf(self.x_vals)) + else: + assert_allclose(scipy.pmf(self.x_vals), demc.pmf(self.x_vals)) + assert_allclose(scipy.logpmf(self.x_vals), demc.logpmf(self.x_vals)) + + if test_rvs: + assert_allclose(scipy.rvs(self.rv_size, default_rng(self.seed)), + demc.rvs(self.rv_size, default_rng(self.seed))) + + def test_normal(self): + means = [0, 1, 2] + stds = [1, 3, 5] + + for mean, std in zip(means, stds): + scipy = stats.norm(loc=mean, scale=std) + demc = dists.normal(mean, std) + self.dist_assertions(scipy, demc) + + def test_uniform(self): + lowers = [0, 1, 2] + uppers = [1, 3, 5] + + for lower, upper in zip(lowers, uppers): + scipy = stats.uniform(loc=lower, scale=upper-lower) + demc = dists.uniform(lower, upper) + self.dist_assertions(scipy, demc) + + def test_beta(self): + alphas = [1, 2, 3] + betas = [3, 2, 1] + + for alpha, beta in zip(alphas, betas): + scipy = stats.beta(alpha, beta) + demc = dists.beta(alpha, beta) + self.dist_assertions(scipy, demc) + + def test_gamma(self): + alphas = [1, 2, 3] + betas = [3, 2, 1] + + for alpha, beta in zip(alphas, betas): + scipy = stats.gamma(alpha, scale=1. / beta) + demc = dists.gamma(alpha, beta) + self.dist_assertions(scipy, demc) + + def test_invgamma(self): + alphas = [1, 2, 3] + betas = [3, 2, 1] + + for alpha, beta in zip(alphas, betas): + scipy = stats.invgamma(alpha, scale=beta) + demc = dists.invgamma(alpha, beta) + self.dist_assertions(scipy, demc, test_rvs=False) + + def test_exp(self): + lams = [1, 2, 3] + + for lam in lams: + scipy = stats.expon(scale=1. / lam) + demc = dists.exp(lam) + self.dist_assertions(scipy, demc) + + def test_poisson(self): + lams = [1, 2, 3] + + for lam in lams: + scipy = stats.poisson(mu=lam) + demc = dists.poisson(lam) + self.dist_assertions(scipy, demc, pdf=False) + + def test_laplace(self): + locs = [0, 1, 2] + diversities = [1, 2, 3] + + for loc, diversity in zip(locs, diversities): + scipy = stats.laplace(loc=loc, scale=diversity) + demc = dists.laplace(loc, diversity) + self.dist_assertions(scipy, demc) + + def test_students_t(self): + dfs = [1, 2, 3] + means = [0, 1, 2] + stds = [1, 2, 3] + + for df, mean, std, in zip(dfs, means, stds): + scipy = stats.t(df=df, loc=mean, scale=std) + demc = dists.students_t(mean, std, df) + self.dist_assertions(scipy, demc) + + @unittest.skip("not implemented") + def test_noncentral_t(self): + dfs = [1, 2, 3] + ncs = [1, 2, 3] + means = [0, 1, 2] + stds = [1, 2, 3] + + for df, nc, mean, std in zip(dfs, ncs, means, stds): + scipy = stats.nct(df=df, nc=nc, loc=mean, scale=std) + demc = dists.noncentral_t(mean, std, df, nc) + self.dist_assertions(scipy, demc) + + def test_halfcauchy(self): + scales = [1, 2, 3] + locs = [0, 1, 2] + + for scale, loc in zip(scales, locs): + scipy = stats.halfcauchy(loc=loc, scale=scale) + demc = dists.halfcauchy(scale, loc) + self.dist_assertions(scipy, demc, test_rvs=False) + +if __name__ == '__main__': + unittest.main()