Merge branch 'pymc-devs:main' into aesaraf_join_shared_input_doc

williambdean · web-flow · commit 8360f8604254 · 2022-11-01T18:10:08.000-04:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -19,7 +19,7 @@ repos:
     - id: isort
       name: isort
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.1.0
+  rev: v3.2.0
   hooks:
     - id: pyupgrade
       args: [--py37-plus]
diff --git a/pymc/printing.py b/pymc/printing.py
@@ -58,7 +58,7 @@ def str_for_dist(
 
     if "latex" in formatting:
         if print_name is not None:
-            print_name = r"\text{" + _latex_escape(dist.name) + "}"
+            print_name = r"\text{" + _latex_escape(dist.name.strip("$")) + "}"
 
         op_name = (
             dist.owner.op._print_name[1]
@@ -67,9 +67,11 @@ def str_for_dist(
         )
         if include_params:
             if print_name:
-                return r"${} \sim {}({})$".format(print_name, op_name, ",~".join(dist_args))
+                return r"${} \sim {}({})$".format(
+                    print_name, op_name, ",~".join([d.strip("$") for d in dist_args])
+                )
             else:
-                return r"${}({})$".format(op_name, ",~".join(dist_args))
+                return r"${}({})$".format(op_name, ",~".join([d.strip("$") for d in dist_args]))
 
         else:
             if print_name:
@@ -138,7 +140,7 @@ def str_for_potential_or_deterministic(
     LaTeX or plain, optionally with distribution parameter values included."""
     print_name = var.name if var.name is not None else "<unnamed>"
     if "latex" in formatting:
-        print_name = r"\text{" + _latex_escape(print_name) + "}"
+        print_name = r"\text{" + _latex_escape(print_name.strip("$")) + "}"
         if include_params:
             return rf"${print_name} \sim \operatorname{{{dist_name}}}({_str_for_expression(var, formatting=formatting)})$"
         else:
@@ -182,7 +184,7 @@ def _str_for_input_rv(var: Variable, formatting: str) -> str:
         else str_for_dist(var, formatting=formatting, include_params=True)
     )
     if "latex" in formatting:
-        return r"\text{" + _latex_escape(_str) + "}"
+        return _latex_text_format(_latex_escape(_str.strip("$")))
     else:
         return _str
 
@@ -215,9 +217,20 @@ def _expand(x):
     names = [x.name for x in parents]
 
     if "latex" in formatting:
-        return r"f(" + ",~".join([r"\text{" + _latex_escape(n) + "}" for n in names]) + ")"
+        return (
+            r"f("
+            + ",~".join([_latex_text_format(_latex_escape(n.strip("$"))) for n in names])
+            + ")"
+        )
+    else:
+        return r"f(" + ", ".join([n.strip("$") for n in names]) + ")"
+
+
+def _latex_text_format(text: str) -> str:
+    if r"\operatorname{" in text:
+        return text
     else:
-        return r"f(" + ", ".join(names) + ")"
+        return r"\text{" + text + "}"
 
 
 def _latex_escape(text: str) -> str:
diff --git a/pymc/sampling.py b/pymc/sampling.py
@@ -2065,127 +2065,10 @@ def sample_posterior_predictive_w(
         weighted models (default), or a dictionary with variable names as keys, and samples as
         numpy arrays.
     """
-    raise NotImplementedError(f"sample_posterior_predictive_w has not yet been ported to PyMC 4.0.")
-
-    if isinstance(traces[0], InferenceData):
-        n_samples = [
-            trace.posterior.sizes["chain"] * trace.posterior.sizes["draw"] for trace in traces
-        ]
-        traces = [dataset_to_point_list(trace.posterior) for trace in traces]
-    elif isinstance(traces[0], xarray.Dataset):
-        n_samples = [trace.sizes["chain"] * trace.sizes["draw"] for trace in traces]
-        traces = [dataset_to_point_list(trace) for trace in traces]
-    else:
-        n_samples = [len(i) * i.nchains for i in traces]
-
-    if models is None:
-        models = [modelcontext(models)] * len(traces)
-
-    if random_seed is not None:
-        (random_seed,) = _get_seeds_per_chain(random_seed, 1)
-
-    for model in models:
-        if model.potentials:
-            warnings.warn(
-                "The effect of Potentials on other parameters is ignored during posterior predictive sampling. "
-                "This is likely to lead to invalid or biased predictive samples.",
-                UserWarning,
-                stacklevel=2,
-            )
-            break
-
-    if weights is None:
-        weights = [1] * len(traces)
-
-    if len(traces) != len(weights):
-        raise ValueError("The number of traces and weights should be the same")
-
-    if len(models) != len(weights):
-        raise ValueError("The number of models and weights should be the same")
-
-    length_morv = len(models[0].observed_RVs)
-    if any(len(i.observed_RVs) != length_morv for i in models):
-        raise ValueError("The number of observed RVs should be the same for all models")
-
-    weights = np.asarray(weights)
-    p = weights / np.sum(weights)
-
-    min_tr = min(n_samples)
-
-    n = (min_tr * p).astype("int")
-    # ensure n sum up to min_tr
-    idx = np.argmax(n)
-    n[idx] = n[idx] + min_tr - np.sum(n)
-    trace = []
-    for i, j in enumerate(n):
-        tr = traces[i]
-        len_trace = len(tr)
-        try:
-            nchain = tr.nchains
-        except AttributeError:
-            nchain = 1
-
-        indices = np.random.randint(0, nchain * len_trace, j)
-        if nchain > 1:
-            chain_idx, point_idx = np.divmod(indices, len_trace)
-            for cidx, pidx in zip(chain_idx, point_idx):
-                trace.append(tr._straces[cidx].point(pidx))
-        else:
-            for idx in indices:
-                trace.append(tr[idx])
-
-    obs = [x for m in models for x in m.observed_RVs]
-    variables = np.repeat(obs, n)
-
-    lengths = list({np.atleast_1d(observed).shape for observed in obs})
-
-    size: List[Optional[Tuple[int, ...]]] = []
-    if len(lengths) == 1:
-        size = [None] * len(variables)
-    elif len(lengths) > 2:
-        raise ValueError("Observed variables could not be broadcast together")
-    else:
-        x = np.zeros(shape=lengths[0])
-        y = np.zeros(shape=lengths[1])
-        b = np.broadcast(x, y)
-        for var in variables:
-            # XXX: This needs to be refactored
-            shape = None  # np.shape(np.atleast_1d(var.distribution.default()))
-            if shape != b.shape:
-                size.append(b.shape)
-            else:
-                size.append(None)
-    len_trace = len(trace)
-
-    if samples is None:
-        samples = len_trace
-
-    indices = np.random.randint(0, len_trace, samples)
-
-    if progressbar:
-        indices = progress_bar(indices, total=samples, display=progressbar)
-
-    try:
-        ppcl: Dict[str, list] = defaultdict(list)
-        for idx in indices:
-            param = trace[idx]
-            var = variables[idx]
-            # TODO sample_posterior_predictive_w is currently only work for model with
-            # one observed.
-            # XXX: This needs to be refactored
-            # ppc[var.name].append(draw_values([var], point=param, size=size[idx])[0])
-            raise NotImplementedError()
-
-    except KeyboardInterrupt:
-        pass
-    else:
-        ppcd = {k: np.asarray(v) for k, v in ppcl.items()}
-        if not return_inferencedata:
-            return ppcd
-        ikwargs: Dict[str, Any] = dict(model=models)
-        if idata_kwargs:
-            ikwargs.update(idata_kwargs)
-        return pm.to_inference_data(posterior_predictive=ppcd, **ikwargs)
+    raise FutureWarning(
+        "The function `sample_posterior_predictive_w` has been removed in PyMC 4.3.0. "
+        "Switch to `arviz.stats.weight_predictions`"
+    )
 
 
 def sample_prior_predictive(
diff --git a/pymc/tests/test_printing.py b/pymc/tests/test_printing.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from pymc import Bernoulli, Censored, Mixture
+from pymc import Bernoulli, Censored, HalfCauchy, Mixture, StudentT
 from pymc.aesaraf import floatX
 from pymc.distributions import (
     Dirichlet,
@@ -130,12 +130,12 @@ def setup_class(self):
                 r"$\text{beta} \sim \operatorname{N}(0,~10)$",
                 r"$\text{Z} \sim \operatorname{N}(f(),~f())$",
                 r"$\text{nb_with_p_n} \sim \operatorname{NB}(10,~\text{nbp})$",
-                r"$\text{zip} \sim \operatorname{MarginalMixture}(f(),~\text{\$\operatorname{DiracDelta}(0)\$},~\text{\$\operatorname{Pois}(5)\$})$",
+                r"$\text{zip} \sim \operatorname{MarginalMixture}(f(),~\operatorname{DiracDelta}(0),~\operatorname{Pois}(5))$",
                 r"$\text{w} \sim \operatorname{Dir}(\text{<constant>})$",
                 (
                     r"$\text{nested_mix} \sim \operatorname{MarginalMixture}(\text{w},"
-                    r"~\text{\$\operatorname{MarginalMixture}(f(),~\text{\\$\operatorname{DiracDelta}(0)\\$},~\text{\\$\operatorname{Pois}(5)\\$})\$},"
-                    r"~\text{\$\operatorname{Censored}(\text{\\$\operatorname{Bern}(0.5)\\$},~-1,~1)\$})$"
+                    r"~\operatorname{MarginalMixture}(f(),~\operatorname{DiracDelta}(0),~\operatorname{Pois}(5)),"
+                    r"~\operatorname{Censored}(\operatorname{Bern}(0.5),~-1,~1))$"
                 ),
                 r"$\text{Y_obs} \sim \operatorname{N}(\text{mu},~\text{sigma})$",
                 r"$\text{pot} \sim \operatorname{Potential}(f(\text{beta},~\text{alpha}))$",
@@ -178,3 +178,43 @@ def test_str_repr(self):
                         assert segment in model_text
                 else:
                     assert text in model_text
+
+
+def test_model_latex_repr_three_levels_model():
+    with Model() as censored_model:
+        mu = Normal("mu", 0.0, 5.0)
+        sigma = HalfCauchy("sigma", 2.5)
+        normal_dist = Normal.dist(mu=mu, sigma=sigma)
+        censored_normal = Censored(
+            "censored_normal", normal_dist, lower=-2.0, upper=2.0, observed=[1, 0, 0.5]
+        )
+
+    latex_repr = censored_model.str_repr(formatting="latex")
+    expected = [
+        "$$",
+        "\\begin{array}{rcl}",
+        "\\text{mu} &\\sim & \\operatorname{N}(0,~5)\\\\\\text{sigma} &\\sim & "
+        "\\operatorname{C^{+}}(0,~2.5)\\\\\\text{censored_normal} &\\sim & "
+        "\\operatorname{Censored}(\\operatorname{N}(\\text{mu},~\\text{sigma}),~-2,~2)",
+        "\\end{array}",
+        "$$",
+    ]
+    assert [line.strip() for line in latex_repr.split("\n")] == expected
+
+
+def test_model_latex_repr_mixture_model():
+    with Model() as mix_model:
+        w = Dirichlet("w", [1, 1])
+        mix = Mixture("mix", w=w, comp_dists=[Normal.dist(0.0, 5.0), StudentT.dist(7.0)])
+
+    latex_repr = mix_model.str_repr(formatting="latex")
+    expected = [
+        "$$",
+        "\\begin{array}{rcl}",
+        "\\text{w} &\\sim & "
+        "\\operatorname{Dir}(\\text{<constant>})\\\\\\text{mix} &\\sim & "
+        "\\operatorname{MarginalMixture}(\\text{w},~\\operatorname{N}(0,~5),~\\operatorname{StudentT}(7,~0,~1))",
+        "\\end{array}",
+        "$$",
+    ]
+    assert [line.strip() for line in latex_repr.split("\n")] == expected
diff --git a/pymc/tests/test_sampling.py b/pymc/tests/test_sampling.py
@@ -1177,71 +1177,6 @@ def test_logging_sampled_basic_rvs_posterior_mutable(self, mock_sample_results,
             caplog.clear()
 
 
-@pytest.mark.xfail(
-    reason="sample_posterior_predictive_w not refactored for v4", raises=NotImplementedError
-)
-class TestSamplePPCW(SeededTest):
-    def test_sample_posterior_predictive_w(self):
-        data0 = np.random.normal(0, 1, size=50)
-        warning_msg = "The number of samples is too small to check convergence reliably"
-
-        with pm.Model() as model_0:
-            mu = pm.Normal("mu", mu=0, sigma=1)
-            y = pm.Normal("y", mu=mu, sigma=1, observed=data0)
-            with pytest.warns(UserWarning, match=warning_msg):
-                trace_0 = pm.sample(10, tune=0, chains=2, return_inferencedata=False)
-            idata_0 = pm.to_inference_data(trace_0, log_likelihood=False)
-
-        with pm.Model() as model_1:
-            mu = pm.Normal("mu", mu=0, sigma=1, size=len(data0))
-            y = pm.Normal("y", mu=mu, sigma=1, observed=data0)
-            with pytest.warns(UserWarning, match=warning_msg):
-                trace_1 = pm.sample(10, tune=0, chains=2, return_inferencedata=False)
-            idata_1 = pm.to_inference_data(trace_1, log_likelihood=False)
-
-        with pm.Model() as model_2:
-            # Model with no observed RVs.
-            mu = pm.Normal("mu", mu=0, sigma=1)
-            with pytest.warns(UserWarning, match=warning_msg):
-                trace_2 = pm.sample(10, tune=0, return_inferencedata=False)
-
-        traces = [trace_0, trace_1]
-        idatas = [idata_0, idata_1]
-        models = [model_0, model_1]
-
-        ppc = pm.sample_posterior_predictive_w(traces, 100, models)
-        assert ppc["y"].shape == (100, 50)
-
-        ppc = pm.sample_posterior_predictive_w(idatas, 100, models)
-        assert ppc["y"].shape == (100, 50)
-
-        with model_0:
-            ppc = pm.sample_posterior_predictive_w([idata_0.posterior], None)
-            assert ppc["y"].shape == (20, 50)
-
-        with pytest.raises(ValueError, match="The number of traces and weights should be the same"):
-            pm.sample_posterior_predictive_w([idata_0.posterior], 100, models, weights=[0.5, 0.5])
-
-        with pytest.raises(ValueError, match="The number of models and weights should be the same"):
-            pm.sample_posterior_predictive_w([idata_0.posterior], 100, models)
-
-        with pytest.raises(
-            ValueError, match="The number of observed RVs should be the same for all models"
-        ):
-            pm.sample_posterior_predictive_w([trace_0, trace_2], 100, [model_0, model_2])
-
-    def test_potentials_warning(self):
-        warning_msg = "The effect of Potentials on other parameters is ignored during"
-        with pm.Model() as m:
-            a = pm.Normal("a", 0, 1)
-            p = pm.Potential("p", a + 1)
-            obs = pm.Normal("obs", a, 1, observed=5)
-
-        trace = az_from_dict({"a": np.random.rand(10)})
-        with pytest.warns(UserWarning, match=warning_msg):
-            pm.sample_posterior_predictive_w(samples=5, traces=[trace, trace], models=[m, m])
-
-
 def check_exec_nuts_init(method):
     with pm.Model() as model:
         pm.Normal("a", mu=0, sigma=1, size=2)