From 1522b954fc249f43e210e8d3f6aca7ab5f59861c Mon Sep 17 00:00:00 2001 From: Rowan Schaefer Date: Sun, 4 Sep 2022 01:23:06 -0400 Subject: [PATCH 01/12] fixed docstrings and a reference --- pymc/smc/kernels.py | 34 ++++++++++++++-------------- pymc/smc/sampling.py | 53 ++++++++++++++++++++++---------------------- 2 files changed, 43 insertions(+), 44 deletions(-) diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py index 04131a0dd..b9dabf566 100644 --- a/pymc/smc/kernels.py +++ b/pymc/smc/kernels.py @@ -53,7 +53,7 @@ class SMC_KERNEL(ABC): to sampling from the prior distribution. This method is only called if `start` is not specified. - _initialize_kernel: default + _initialize_kernel : default Creates initial population of particles in the variable `self.tempered_posterior` and populates the `self.var_info` dictionary with information about model variables shape and size as @@ -70,13 +70,13 @@ class SMC_KERNEL(ABC): This method should not be modified. - setup_kernel: optional + setup_kernel : optional May include any logic that should be performed before sampling starts. During each sampling stage the following methods are called in order: - update_beta_and_weights: default + update_beta_and_weights : default The inverse temperature self.beta is updated based on the self.likelihood_logp and `threshold` parameter @@ -88,7 +88,7 @@ class SMC_KERNEL(ABC): Finally the model log_marginal_likelihood of the tempered posterior is updated from these weights - resample: default + resample : default The particles in self.posterior are sampled with replacement based on self.weights, and the used resampling indexes are saved in `self.resampling_indexes`. @@ -97,27 +97,27 @@ class SMC_KERNEL(ABC): to the order of the resampled particles. self.tempered_posterior_logp is computed from these and the current self.beta - tune: optional + tune : optional May include logic that should be performed before every mutation step - mutate: REQUIRED + mutate : REQUIRED Mutate particles in self.tempered_posterior This method is further responsible to update the self.prior_logp, self.likelihod_logp and self.tempered_posterior_logp, corresponding to each mutated particle - sample_stats: default + sample_stats : default Returns important sampling_stats at the end of each stage in a dictionary format. This will be saved in the final InferenceData objcet under `sample_stats`. Finally, at the end of sampling the following methods are called: - _posterior_to_trace: default + _posterior_to_trace : default Convert final population of particles to a posterior trace object. This method should not be modified. - sample_settings: default: + sample_settings : default: Returns important sample_settings at the end of sampling in a dictionary format. This will be saved in the final InferenceData objcet under `sample_stats`. @@ -135,16 +135,16 @@ def __init__( Parameters ---------- - draws: int - The number of samples to draw from the posterior (i.e. last stage). And also the number of + draws : int, default = 2000 + The number of samples to draw from the posterior (i.e. last stage). Also the number of independent chains. Defaults to 2000. - start: dict, or array of dict + start : dict, or array of dict, default = None Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. - model: Model (optional if in ``with`` context)). - random_seed: int + model : Model (optional if in ``with`` context). + random_seed : {None, int, array_like[ints]} Value used to initialize the random number generator. - threshold: float + threshold : float, default = 0.5 Determines the change of beta from stage to stage, i.e.indirectly the number of stages, the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. It should be between 0 and 1. @@ -353,7 +353,7 @@ def __init__(self, *args, correlation_threshold=0.01, **kwargs): """ Parameters ---------- - correlation_threshold: float + correlation_threshold : float, default = 0.01 The lower the value the higher the number of IMH steps computed automatically. Defaults to 0.01. It should be between 0 and 1. """ @@ -455,7 +455,7 @@ def __init__(self, *args, correlation_threshold=0.01, **kwargs): """ Parameters ---------- - correlation_threshold: float + correlation_threshold : float, default = 0.01 The lower the value the higher the number of MH steps computed automatically. Defaults to 0.01. It should be between 0 and 1. """ diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index 19f783610..4f0f0dd33 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -56,52 +56,51 @@ def sample_smc( Parameters ---------- - draws: int + draws : int, default = 2000 The number of samples to draw from the posterior (i.e. last stage). And also the number of independent chains. Defaults to 2000. - kernel: SMC Kernel used. Defaults to pm.smc.IMH (Independent Metropolis Hastings) - start: dict, or array of dict + kernel : class, default = pm.smc.smc.IMH + SMC Kernel used. Defaults to pm.smc.IMH (Independent Metropolis Hastings) + start : dict, or array of dict, default = None Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. - model: Model (optional if in ``with`` context)). - random_seed : int, array-like of int, RandomState or Generator, optional - Random seed(s) used by the sampling steps. If a list, tuple or array of ints - is passed, each entry will be used to seed each chain. A ValueError will be - raised if the length does not match the number of chains. - chains : int + model : Model (optional if in ``with`` context)). + random_seed : {None, int, array_like[ints]} + Value used to initialize the random number generator. + chains : int, default = None The number of chains to sample. Running independent chains is important for some convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever is larger. - cores : int + cores : int, default = None The number of chains to run in parallel. If ``None``, set to the number of CPUs in the system. - compute_convergence_checks : bool + compute_convergence_checks : bool, default = True Whether to compute sampler statistics like ``R hat`` and ``effective_n``. Defaults to ``True``. - return_inferencedata : bool, default=True + return_inferencedata : bool, default = True Whether to return the trace as an :class:`arviz:arviz.InferenceData` (True) object or a `MultiTrace` (False) Defaults to ``True``. idata_kwargs : dict, optional Keyword arguments for :func:`pymc.to_inference_data` - progressbar : bool, optional default=True + progressbar : bool, optional, default = True Whether or not to display a progress bar in the command line. - **kernel_kwargs: keyword arguments passed to the SMC kernel. + **kernel_kwargs : keyword arguments passed to the SMC kernel. The default IMH kernel takes the following keywords: - threshold: float + threshold : float, default = 0.5 Determines the change of beta from stage to stage, i.e. indirectly the number of stages, the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. It should be between 0 and 1. - correlation_threshold: float + correlation_threshold : float, default = 0.01 The lower the value the higher the number of MCMC steps computed automatically. Defaults to 0.01. It should be between 0 and 1. - Keyword arguments for other kernels should be checked in the respective docstrings + Keyword arguments for other kernels should be checked in the respective docstrings. Notes ----- SMC works by moving through successive stages. At each stage the inverse temperature :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0 - we have the prior distribution and when :math:`\beta` =1 we have the posterior distribution. - So in more general terms we are always computing samples from a tempered posterior that we can + we have the prior distribution and when :math:`\beta` = 1 we have the posterior distribution. + In general terms, we are always computing samples from a tempered posterior that we can write as: .. math:: @@ -118,7 +117,7 @@ def sample_smc( 4. Compute a set of N importance weights W. The weights are computed as the ratio of the likelihoods of a sample at stage i+1 and stage i. 5. Obtain :math:`S_{w}` by re-sampling according to W. - 6. Use W to compute the mean and covariance for the proposal distribution, a MVNormal. + 6. Use W to compute the mean and covariance for the proposal distribution, a MvNormal. 7. Run N independent MCMC chains, starting each one from a different sample in :math:`S_{w}`. For the IMH kernel, the mean of the proposal distribution is the mean of the previous posterior stage and not the current point in parameter space. @@ -130,15 +129,15 @@ def sample_smc( References ---------- - .. [Minson2013] Minson, S. E. and Simons, M. and Beck, J. L., (2013), - Bayesian inversion for finite fault earthquake source models I- Theory and algorithm. - Geophysical Journal International, 2013, 194(3), pp.1701-1726, + .. [Minson2013] Minson, S. E., Simons, M., and Beck, J. L. (2013). + "Bayesian inversion for finite fault earthquake source models I- Theory and algorithm." + Geophysical Journal International, 2013, 194(3), pp.1701-1726. `link `__ - .. [Ching2007] Ching, J. and Chen, Y. (2007). - Transitional Markov Chain Monte Carlo Method for Bayesian Model Updating, Model Class - Selection, and Model Averaging. J. Eng. Mech., 10.1061/(ASCE)0733-9399(2007)133:7(816), - 816-832. `link `__ """ From ad5d3994d0a0992d41d470a1d54ce27ede7d7eb4 Mon Sep 17 00:00:00 2001 From: Rowan Schaefer Date: Sun, 4 Sep 2022 13:28:47 -0400 Subject: [PATCH 02/12] trailing whitespace --- pymc/smc/kernels.py | 85 +++++++++++++++++++++++--------------------- pymc/smc/sampling.py | 44 ++++++++++++----------- 2 files changed, 68 insertions(+), 61 deletions(-) diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py index b9dabf566..444419747 100644 --- a/pymc/smc/kernels.py +++ b/pymc/smc/kernels.py @@ -41,9 +41,9 @@ class SMC_KERNEL(ABC): - """Base class for the Sequential Monte Carlo kernels + """Base class for the Sequential Monte Carlo kernels. - To create a new SMC kernel you should subclass from this. + To create a new kernel you should subclass from this. Before sampling, the following methods are called once in order: @@ -57,16 +57,16 @@ class SMC_KERNEL(ABC): Creates initial population of particles in the variable `self.tempered_posterior` and populates the `self.var_info` dictionary with information about model variables shape and size as - {var.name : (var.shape, var.size) + {var.name : (var.shape, var.size)}. - The functions self.prior_logp_func and self.likelihood_logp_func are + The functions `self.prior_logp_func` and `self.likelihood_logp_func` are created in this step. These expect a 1D numpy array with the summed sizes of each raveled model variable (in the order specified in - model.inial_point). + :meth:`pymc.Model.initial_point`). Finally, this method computes the log prior and log likelihood for - the initial particles, and saves them in self.prior_logp and - self.likelihood_logp. + the initial particles, and saves them in `self.prior_logp` and + `self.likelihood_logp`. This method should not be modified. @@ -77,39 +77,39 @@ class SMC_KERNEL(ABC): During each sampling stage the following methods are called in order: update_beta_and_weights : default - The inverse temperature self.beta is updated based on the self.likelihood_logp - and `threshold` parameter + The inverse temperature self.beta is updated based on the `self.likelihood_logp` + and `threshold` parameter. - The importance self.weights of each particle are computed from the old and newly - selected inverse temperature + The importance `self.weights` of each particle are computed from the old and newly + selected inverse temperature. The iteration number stored in `self.iteration` is updated by this method. - Finally the model log_marginal_likelihood of the tempered posterior - is updated from these weights + Finally the model `log_marginal_likelihood` of the tempered posterior + is updated from these weights. resample : default - The particles in self.posterior are sampled with replacement based - on self.weights, and the used resampling indexes are saved in + The particles in `self.posterior` are sampled with replacement based + on `self.weights`, and the used resampling indexes are saved in `self.resampling_indexes`. - The arrays self.prior_logp, self.likelihood_logp are rearranged according - to the order of the resampled particles. self.tempered_posterior_logp - is computed from these and the current self.beta + The arrays `self.prior_logp` and `self.likelihood_logp` are rearranged according + to the order of the resampled particles. `self.tempered_posterior_logp` + is computed from these and the current `self.beta`. tune : optional - May include logic that should be performed before every mutation step + May include logic that should be performed before every mutation step. mutate : REQUIRED - Mutate particles in self.tempered_posterior + Mutate particles in `self.tempered_posterior`. - This method is further responsible to update the self.prior_logp, - self.likelihod_logp and self.tempered_posterior_logp, corresponding - to each mutated particle + This method is further responsible to update the `self.prior_logp`, + `self.likelihod_logp` and `self.tempered_posterior_logp`, corresponding + to each mutated particle. sample_stats : default Returns important sampling_stats at the end of each stage in a dictionary - format. This will be saved in the final InferenceData objcet under `sample_stats`. + format. This will be saved in the final InferenceData object under `sample_stats`. Finally, at the end of sampling the following methods are called: @@ -117,9 +117,9 @@ class SMC_KERNEL(ABC): Convert final population of particles to a posterior trace object. This method should not be modified. - sample_settings : default: + sample_settings : default Returns important sample_settings at the end of sampling in a dictionary - format. This will be saved in the final InferenceData objcet under `sample_stats`. + format. This will be saved in the final InferenceData object under `sample_stats`. """ @@ -142,8 +142,8 @@ def __init__( Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. model : Model (optional if in ``with`` context). - random_seed : {None, int, array_like[ints]} - Value used to initialize the random number generator. + random_seed : int, array_like of int, RandomState or Generator, optional + Random seed(s) used by the sampling steps. threshold : float, default = 0.5 Determines the change of beta from stage to stage, i.e.indirectly the number of stages, the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. @@ -199,7 +199,7 @@ def initialize_population(self) -> Dict[str, np.ndarray]: return cast(Dict[str, np.ndarray], dict_prior) def _initialize_kernel(self): - """Create variables and logp function necessary to run kernel + """Create variables and logp function necessary to run SMC_kernel This method should not be overwritten. If needed, use `setup_kernel` instead. @@ -301,7 +301,7 @@ def mutate(self): def sample_stats(self) -> Dict: """Stats to be saved at the end of each stage - These stats will be saved under `sample_stats` in the final InferenceData. + These stats will be saved under `sample_stats` in the final InferenceData object. """ return { "log_marginal_likelihood": self.log_marginal_likelihood if self.beta == 1 else np.nan, @@ -309,9 +309,9 @@ def sample_stats(self) -> Dict: } def sample_settings(self) -> Dict: - """Kernel settings to be saved once at the end of sampling + """SMC_kernel settings to be saved once at the end of sampling. - These stats will be saved under `sample_stats` in the final InferenceData. + These stats will be saved under `sample_stats` in the final InferenceData object. """ return { @@ -347,15 +347,18 @@ def _posterior_to_trace(self, chain=0) -> NDArray: class IMH(SMC_KERNEL): - """Independent Metropolis-Hastings SMC kernel""" + """Independent Metropolis-Hastings SMC_kernel""" def __init__(self, *args, correlation_threshold=0.01, **kwargs): """ Parameters ---------- correlation_threshold : float, default = 0.01 - The lower the value the higher the number of IMH steps computed automatically. + The lower the value, the higher the number of IMH steps computed automatically. Defaults to 0.01. It should be between 0 and 1. + **kwargs : dict, optional + Keyword arguments passed to the SMC_kernel. + """ super().__init__(*args, **kwargs) self.correlation_threshold = correlation_threshold @@ -449,15 +452,17 @@ def get(self, b): class MH(SMC_KERNEL): - """Metropolis-Hastings SMC kernel""" + """Metropolis-Hastings SMC_kernel""" def __init__(self, *args, correlation_threshold=0.01, **kwargs): """ Parameters ---------- correlation_threshold : float, default = 0.01 - The lower the value the higher the number of MH steps computed automatically. + The lower the value, the higher the number of MH steps computed automatically. Defaults to 0.01. It should be between 0 and 1. + **kwargs : dict, optional + Keyword arguments passed to the SMC_kernel. """ super().__init__(*args, **kwargs) self.correlation_threshold = correlation_threshold @@ -468,7 +473,7 @@ def __init__(self, *args, correlation_threshold=0.01, **kwargs): def setup_kernel(self): """Proposal dist is just a Multivariate Normal with unit identity covariance. - Dimension specific scaling is provided by self.proposal_scales and set in self.tune() + Dimension specific scaling is provided by `self.proposal_scales` and set in `self.tune()` """ ndim = self.tempered_posterior.shape[1] self.proposal_scales = np.full(self.draws, min(1, 2.38**2 / ndim)) @@ -586,11 +591,11 @@ def _logp_forw(point, out_vars, in_vars, shared): Parameters ---------- out_vars : list - containing :class:`pymc.Distribution` for the output variables + containing Distribution for the output variables in_vars : list - containing :class:`pymc.Distribution` for the input variables + containing Distribution for the input variables shared : list - containing :class:`aesara.tensor.Tensor` for depended shared data + containing TensorVariable for depended shared data """ # Replace integer inputs with rounded float inputs diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index 4f0f0dd33..e4c273363 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -56,41 +56,43 @@ def sample_smc( Parameters ---------- - draws : int, default = 2000 + draws : int, default 2000 The number of samples to draw from the posterior (i.e. last stage). And also the number of independent chains. Defaults to 2000. - kernel : class, default = pm.smc.smc.IMH - SMC Kernel used. Defaults to pm.smc.IMH (Independent Metropolis Hastings) - start : dict, or array of dict, default = None + kernel : class, default `pymc.smc.smc.IMH` + SMC_Kernel used. Defaults to :class:`pymc.smc.smc.IMH` (Independent Metropolis Hastings) + start : dict, or array of dict, default None Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. - model : Model (optional if in ``with`` context)). - random_seed : {None, int, array_like[ints]} - Value used to initialize the random number generator. - chains : int, default = None + model : Model (optional if in ``with`` context). + random_seed : int, array_like of int, RandomState or Generator, optional + Random seed(s) used by the sampling steps. If a list, tuple or array of ints + is passed, each entry will be used to seed each chain. A ValueError will be + raised if the length does not match the number of chains. + chains : int, default None The number of chains to sample. Running independent chains is important for some convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever is larger. - cores : int, default = None + cores : int, default None The number of chains to run in parallel. If ``None``, set to the number of CPUs in the system. - compute_convergence_checks : bool, default = True + compute_convergence_checks : bool, default True Whether to compute sampler statistics like ``R hat`` and ``effective_n``. Defaults to ``True``. - return_inferencedata : bool, default = True - Whether to return the trace as an :class:`arviz:arviz.InferenceData` (True) object or a `MultiTrace` (False) + return_inferencedata : bool, default True + Whether to return the trace as an InferenceData (True) object or a MultiTrace (False). Defaults to ``True``. idata_kwargs : dict, optional - Keyword arguments for :func:`pymc.to_inference_data` - progressbar : bool, optional, default = True + Keyword arguments for :func:`pymc.to_inference_data`. + progressbar : bool, optional, default True Whether or not to display a progress bar in the command line. - **kernel_kwargs : keyword arguments passed to the SMC kernel. - The default IMH kernel takes the following keywords: - threshold : float, default = 0.5 + **kernel_kwargs : dict, optional + Keyword arguments passed to the SMC_kernel. The default IMH kernel takes the following keywords: + threshold : float, default 0.5 Determines the change of beta from stage to stage, i.e. indirectly the number of stages, the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. It should be between 0 and 1. - correlation_threshold : float, default = 0.01 + correlation_threshold : float, default 0.01 The lower the value the higher the number of MCMC steps computed automatically. Defaults to 0.01. It should be between 0 and 1. Keyword arguments for other kernels should be checked in the respective docstrings. @@ -112,7 +114,7 @@ def sample_smc( 1. Initialize :math:`\beta` at zero and stage at zero. 2. Generate N samples :math:`S_{\beta}` from the prior (because when :math `\beta = 0` the tempered posterior is the prior). - 3. Increase :math:`\beta` in order to make the effective sample size equals some predefined + 3. Increase :math:`\beta` in order to make the effective sample size equal some predefined value (we use :math:`Nt`, where :math:`t` is 0.5 by default). 4. Compute a set of N importance weights W. The weights are computed as the ratio of the likelihoods of a sample at stage i+1 and stage i. @@ -152,7 +154,7 @@ def sample_smc( if kernel_kwargs.pop("save_sim_data", None) is not None: warnings.warn( - "save_sim_data has been deprecated. Use pm.sample_posterior_predictive " + "Save_sim_data has been deprecated. Use pm.sample_posterior_predictive " "to obtain the same type of samples.", FutureWarning, stacklevel=2, @@ -160,7 +162,7 @@ def sample_smc( if kernel_kwargs.pop("save_log_pseudolikelihood", None) is not None: warnings.warn( - "save_log_pseudolikelihood has been deprecated. This information is " + "Save_log_pseudolikelihood has been deprecated. This information is " "now saved as log_likelihood in models with Simulator distributions.", FutureWarning, stacklevel=2, From 195767d12b7869ca853f2d7007da0da7361254d0 Mon Sep 17 00:00:00 2001 From: Rowan Schaefer Date: Sun, 4 Sep 2022 13:40:46 -0400 Subject: [PATCH 03/12] fixed typo --- pymc/smc/kernels.py | 25 ++++++++++++++----------- pymc/smc/sampling.py | 6 +++--- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py index 444419747..ce3b41238 100644 --- a/pymc/smc/kernels.py +++ b/pymc/smc/kernels.py @@ -135,16 +135,16 @@ def __init__( Parameters ---------- - draws : int, default = 2000 + draws : int, default 2000 The number of samples to draw from the posterior (i.e. last stage). Also the number of independent chains. Defaults to 2000. - start : dict, or array of dict, default = None + start : dict, or array of dict, default None Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. model : Model (optional if in ``with`` context). random_seed : int, array_like of int, RandomState or Generator, optional - Random seed(s) used by the sampling steps. - threshold : float, default = 0.5 + Value used to initialize the random number generator. + threshold : float, default 0.5 Determines the change of beta from stage to stage, i.e.indirectly the number of stages, the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. It should be between 0 and 1. @@ -353,11 +353,12 @@ def __init__(self, *args, correlation_threshold=0.01, **kwargs): """ Parameters ---------- - correlation_threshold : float, default = 0.01 + correlation_threshold : float, default 0.01 The lower the value, the higher the number of IMH steps computed automatically. Defaults to 0.01. It should be between 0 and 1. **kwargs : dict, optional - Keyword arguments passed to the SMC_kernel. + Keyword arguments passed to the SMC_kernel. Refer to SMC_kernel documentation for a + list of all possible arguments. """ super().__init__(*args, **kwargs) @@ -458,11 +459,13 @@ def __init__(self, *args, correlation_threshold=0.01, **kwargs): """ Parameters ---------- - correlation_threshold : float, default = 0.01 + correlation_threshold : float, default 0.01 The lower the value, the higher the number of MH steps computed automatically. Defaults to 0.01. It should be between 0 and 1. **kwargs : dict, optional - Keyword arguments passed to the SMC_kernel. + Keyword arguments passed to the SMC_kernel. Refer to SMC_kernel documentation for a + list of all possible arguments. + """ super().__init__(*args, **kwargs) self.correlation_threshold = correlation_threshold @@ -591,11 +594,11 @@ def _logp_forw(point, out_vars, in_vars, shared): Parameters ---------- out_vars : list - containing Distribution for the output variables + Containing Distribution for the output variables in_vars : list - containing Distribution for the input variables + Containing Distribution for the input variables shared : list - containing TensorVariable for depended shared data + Containing TensorVariable for depended shared data """ # Replace integer inputs with rounded float inputs diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index e4c273363..63591a841 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -102,7 +102,7 @@ def sample_smc( SMC works by moving through successive stages. At each stage the inverse temperature :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0 we have the prior distribution and when :math:`\beta` = 1 we have the posterior distribution. - In general terms, we are always computing samples from a tempered posterior that we can + So in more general terms, we are always computing samples from a tempered posterior that we can write as: .. math:: @@ -154,7 +154,7 @@ def sample_smc( if kernel_kwargs.pop("save_sim_data", None) is not None: warnings.warn( - "Save_sim_data has been deprecated. Use pm.sample_posterior_predictive " + "save_sim_data has been deprecated. Use pm.sample_posterior_predictive " "to obtain the same type of samples.", FutureWarning, stacklevel=2, @@ -162,7 +162,7 @@ def sample_smc( if kernel_kwargs.pop("save_log_pseudolikelihood", None) is not None: warnings.warn( - "Save_log_pseudolikelihood has been deprecated. This information is " + "save_log_pseudolikelihood has been deprecated. This information is " "now saved as log_likelihood in models with Simulator distributions.", FutureWarning, stacklevel=2, From ca632370a0e54b3de17eb65431934edf946f4330 Mon Sep 17 00:00:00 2001 From: Rowan <70077168+rowangayleschaefer@users.noreply.github.com> Date: Tue, 13 Sep 2022 17:49:58 -0400 Subject: [PATCH 04/12] Update pymc/smc/sample_smc.py Co-authored-by: Oriol Abril-Pla --- pymc/smc/sampling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index 63591a841..63763df1a 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -59,8 +59,8 @@ def sample_smc( draws : int, default 2000 The number of samples to draw from the posterior (i.e. last stage). And also the number of independent chains. Defaults to 2000. - kernel : class, default `pymc.smc.smc.IMH` - SMC_Kernel used. Defaults to :class:`pymc.smc.smc.IMH` (Independent Metropolis Hastings) + kernel : SMC_kernel, optional + SMC kernel used. Defaults to :class:`pymc.smc.smc.IMH` (Independent Metropolis Hastings) start : dict, or array of dict, default None Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. From 6f06d31301327f6ae7f20a99dc99e57ea0853407 Mon Sep 17 00:00:00 2001 From: Rowan <70077168+rowangayleschaefer@users.noreply.github.com> Date: Tue, 13 Sep 2022 17:50:13 -0400 Subject: [PATCH 05/12] Update pymc/smc/sample_smc.py Co-authored-by: Oriol Abril-Pla --- pymc/smc/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index 63763df1a..c69a45472 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -69,7 +69,7 @@ def sample_smc( Random seed(s) used by the sampling steps. If a list, tuple or array of ints is passed, each entry will be used to seed each chain. A ValueError will be raised if the length does not match the number of chains. - chains : int, default None + chains : int, optional The number of chains to sample. Running independent chains is important for some convergence statistics. If ``None`` (default), then set to either ``cores`` or 2, whichever is larger. From a7c07457ba8bd2edf81b063a1dcc9d2e087ec8c2 Mon Sep 17 00:00:00 2001 From: Rowan <70077168+rowangayleschaefer@users.noreply.github.com> Date: Tue, 13 Sep 2022 17:51:00 -0400 Subject: [PATCH 06/12] Update pymc/smc/sample_smc.py Co-authored-by: Oriol Abril-Pla --- pymc/smc/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index c69a45472..d7cc0cdce 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -61,7 +61,7 @@ def sample_smc( independent chains. Defaults to 2000. kernel : SMC_kernel, optional SMC kernel used. Defaults to :class:`pymc.smc.smc.IMH` (Independent Metropolis Hastings) - start : dict, or array of dict, default None + start : dict or array of dict, optional Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. model : Model (optional if in ``with`` context). From 041d6f5beec75279b9a86e3cfd9aba61b5dcd178 Mon Sep 17 00:00:00 2001 From: Rowan <70077168+rowangayleschaefer@users.noreply.github.com> Date: Tue, 13 Sep 2022 17:51:38 -0400 Subject: [PATCH 07/12] Update pymc/smc/sample_smc.py Co-authored-by: Oriol Abril-Pla --- pymc/smc/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index d7cc0cdce..035fb8934 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -101,7 +101,7 @@ def sample_smc( ----- SMC works by moving through successive stages. At each stage the inverse temperature :math:`\beta` is increased a little bit (starting from 0 up to 1). When :math:`\beta` = 0 - we have the prior distribution and when :math:`\beta` = 1 we have the posterior distribution. + we have the prior distribution and when :math:`\beta = 1` we have the posterior distribution. So in more general terms, we are always computing samples from a tempered posterior that we can write as: From dab7142380f2ae5b830b55bbb9b844a228438e7a Mon Sep 17 00:00:00 2001 From: Rowan <70077168+rowangayleschaefer@users.noreply.github.com> Date: Thu, 13 Oct 2022 11:48:56 -0400 Subject: [PATCH 08/12] Update pymc/smc/sample_smc.py Co-authored-by: Oriol Abril-Pla --- pymc/smc/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index 035fb8934..bf03bcd6c 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -65,7 +65,7 @@ def sample_smc( Starting point in parameter space. It should be a list of dict with length `chains`. When None (default) the starting point is sampled from the prior distribution. model : Model (optional if in ``with`` context). - random_seed : int, array_like of int, RandomState or Generator, optional + random_seed : int, array_like of int, RandomState or numpy_Generator, optional Random seed(s) used by the sampling steps. If a list, tuple or array of ints is passed, each entry will be used to seed each chain. A ValueError will be raised if the length does not match the number of chains. From f01be40ebdd6011dfb5911f5f6c9b4e5607255c6 Mon Sep 17 00:00:00 2001 From: Rowan <70077168+rowangayleschaefer@users.noreply.github.com> Date: Thu, 13 Oct 2022 11:56:59 -0400 Subject: [PATCH 09/12] Update pymc/smc/sample_smc.py Co-authored-by: Oriol Abril-Pla --- pymc/smc/sampling.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index bf03bcd6c..d7ba6b0e3 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -88,10 +88,11 @@ def sample_smc( Whether or not to display a progress bar in the command line. **kernel_kwargs : dict, optional Keyword arguments passed to the SMC_kernel. The default IMH kernel takes the following keywords: - threshold : float, default 0.5 - Determines the change of beta from stage to stage, i.e. indirectly the number of stages, - the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. - It should be between 0 and 1. + + threshold : float, default 0.5 + Determines the change of beta from stage to stage, i.e. indirectly the number of stages, + the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. + It should be between 0 and 1. correlation_threshold : float, default 0.01 The lower the value the higher the number of MCMC steps computed automatically. Defaults to 0.01. It should be between 0 and 1. From aaf4a11ab45a686d4f23ddc16d19b38719b1b6c4 Mon Sep 17 00:00:00 2001 From: Rowan <70077168+rowangayleschaefer@users.noreply.github.com> Date: Thu, 13 Oct 2022 11:59:43 -0400 Subject: [PATCH 10/12] Update pymc/smc/smc.py Co-authored-by: Oriol Abril-Pla --- pymc/smc/kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py index ce3b41238..937036679 100644 --- a/pymc/smc/kernels.py +++ b/pymc/smc/kernels.py @@ -199,7 +199,7 @@ def initialize_population(self) -> Dict[str, np.ndarray]: return cast(Dict[str, np.ndarray], dict_prior) def _initialize_kernel(self): - """Create variables and logp function necessary to run SMC_kernel + """Create variables and logp function necessary to run SMC kernel This method should not be overwritten. If needed, use `setup_kernel` instead. From b0bcd1d033fac435154fc78ac2bd1da58955ffa8 Mon Sep 17 00:00:00 2001 From: Rowan Date: Thu, 13 Oct 2022 16:42:04 +0000 Subject: [PATCH 11/12] Made changes to smc.py and sample_smc.py for pr#6114 --- docs/source/api/smc.rst | 3 ++- pymc/smc/kernels.py | 8 +++++++- pymc/variational/inference.py | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/docs/source/api/smc.rst b/docs/source/api/smc.rst index 55f228da2..0627d69c8 100644 --- a/docs/source/api/smc.rst +++ b/docs/source/api/smc.rst @@ -8,7 +8,8 @@ Sequential Monte Carlo sample_smc -(smc_kernels)= +.. _smc_kernels: + SMC kernels ----------- diff --git a/pymc/smc/kernels.py b/pymc/smc/kernels.py index 937036679..a19507cac 100644 --- a/pymc/smc/kernels.py +++ b/pymc/smc/kernels.py @@ -43,7 +43,7 @@ class SMC_KERNEL(ABC): """Base class for the Sequential Monte Carlo kernels. - To create a new kernel you should subclass from this. + To create a new SMC kernel you should subclass from this. Before sampling, the following methods are called once in order: @@ -132,6 +132,7 @@ def __init__( threshold=0.5, ): """ + Initialize the SMC_kernel class. Parameters ---------- @@ -149,6 +150,11 @@ def __init__( the higher the value of `threshold` the higher the number of stages. Defaults to 0.5. It should be between 0 and 1. + Attributes + ---------- + self.var_info : dict + Dictionary that contains information about model variables shape and size. + """ self.draws = draws diff --git a/pymc/variational/inference.py b/pymc/variational/inference.py index b2f8a4a0b..94bc223b3 100644 --- a/pymc/variational/inference.py +++ b/pymc/variational/inference.py @@ -49,12 +49,12 @@ class Inference: Parameters ---------- - op: Operator class - approx: Approximation class or instance - tf: TestFunction instance - model: Model + op : Operator class #:class:`~pymc.variational.operators` + approx : Approximation class or instance #:class:`~pymc.variational.approximations` + tf : TestFunction instance #? + model : Model PyMC Model - kwargs: kwargs passed to :class:`Operator` + kwargs : kwargs passed to :class:`Operator` #:class:`~pymc.variational.operators`, optional """ def __init__(self, op, approx, tf, **kwargs): @@ -96,18 +96,18 @@ def fit(self, n=10000, score=None, callbacks=None, progressbar=True, **kwargs): Parameters ---------- - n: int + n : int number of iterations - score: bool + score : bool evaluate loss on each iteration or not - callbacks: list[function: (Approximation, losses, i) -> None] + callbacks : list[function: (Approximation, losses, i) -> None] calls provided functions after each iteration step - progressbar: bool + progressbar : bool whether to show progressbar or not Other Parameters ---------------- - obj_n_mc: `int` + obj_n_mc: int Number of monte carlo samples used for approximation of objective gradients tf_n_mc: `int` Number of monte carlo samples used for approximation of test function gradients From 9b727246edc159feaa2f17c272b6211947bbc5de Mon Sep 17 00:00:00 2001 From: Michael Osthege Date: Sat, 19 Nov 2022 14:16:42 +0100 Subject: [PATCH 12/12] Fix pre-commit --- pymc/smc/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/smc/sampling.py b/pymc/smc/sampling.py index d7ba6b0e3..0cd5c39cc 100644 --- a/pymc/smc/sampling.py +++ b/pymc/smc/sampling.py @@ -88,7 +88,7 @@ def sample_smc( Whether or not to display a progress bar in the command line. **kernel_kwargs : dict, optional Keyword arguments passed to the SMC_kernel. The default IMH kernel takes the following keywords: - + threshold : float, default 0.5 Determines the change of beta from stage to stage, i.e. indirectly the number of stages, the higher the value of `threshold` the higher the number of stages. Defaults to 0.5.