From 3967c02f1769577b649c34a84c9aafd05d0de058 Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Fri, 8 Apr 2022 10:16:49 -0400 Subject: [PATCH 1/6] maybe clip poisson rates --- src/pyhf/pdf.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pyhf/pdf.py b/src/pyhf/pdf.py index 8ca28427ac..a7f58a9e9d 100644 --- a/src/pyhf/pdf.py +++ b/src/pyhf/pdf.py @@ -545,7 +545,9 @@ def has_pdf(self): return True def make_pdf(self, pars): + tensorlib, _ = get_backend() lambdas_data = self.expected_data(pars) + lambdas_data = tensorlib.clip(lambdas_data, 1e-6, max_value=None) return prob.Independent(prob.Poisson(lambdas_data)) def logpdf(self, maindata, pars): From 59e4a6ede51fd16b48a6d3f454aa308df88cb1d4 Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Wed, 20 Apr 2022 12:32:18 -0700 Subject: [PATCH 2/6] update to allow for configurable clipping via model --- src/pyhf/pdf.py | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/src/pyhf/pdf.py b/src/pyhf/pdf.py index a7f58a9e9d..dddc6637c0 100644 --- a/src/pyhf/pdf.py +++ b/src/pyhf/pdf.py @@ -2,7 +2,7 @@ import copy import logging -from typing import List +from typing import List, Union import pyhf.parameters import pyhf @@ -506,7 +506,15 @@ def logpdf(self, auxdata, pars): class _MainModel: """Factory class to create pdfs for the main measurement.""" - def __init__(self, config, modifiers, nominal_rates, batch_size=None): + def __init__( + self, + config, + modifiers, + nominal_rates, + batch_size=None, + clip_sample_data: Union[float, None] = None, + clip_bin_data: Union[float, None] = None, + ): default_backend = pyhf.default_backend self.config = config @@ -515,6 +523,9 @@ def __init__(self, config, modifiers, nominal_rates, batch_size=None): self._delta_mods = [] self.batch_size = batch_size + self.clip_sample_data = clip_sample_data + self.clip_bin_data = clip_bin_data + self._nominal_rates = default_backend.tile( nominal_rates, (1, 1, self.batch_size or 1, 1) ) @@ -547,7 +558,6 @@ def has_pdf(self): def make_pdf(self, pars): tensorlib, _ = get_backend() lambdas_data = self.expected_data(pars) - lambdas_data = tensorlib.clip(lambdas_data, 1e-6, max_value=None) return prob.Independent(prob.Poisson(lambdas_data)) def logpdf(self, maindata, pars): @@ -621,6 +631,11 @@ def expected_data(self, pars, return_by_sample=False): allfac = tensorlib.concatenate(factors + [nom_plus_delta]) newbysample = tensorlib.product(allfac, axis=0) + if self.clip_sample_data: + newbysample = tensorlib.clip( + newbysample, self.clip_sample_data, max_value=None + ) + if return_by_sample: batch_first = tensorlib.einsum('ij...->ji...', newbysample) if self.batch_size is None: @@ -628,6 +643,9 @@ def expected_data(self, pars, return_by_sample=False): return batch_first newresults = tensorlib.sum(newbysample, axis=0) + if self.clip_bin_data: + newresults = tensorlib.clip(newresults, self.clip_bin_data, max_value=None) + if self.batch_size is None: return newresults[0] return newresults @@ -642,6 +660,8 @@ def __init__( modifier_set=None, batch_size=None, validate: bool = True, + clip_sample_data: Union[float, None] = None, + clip_bin_data: Union[float, None] = None, **config_kwargs, ): """ @@ -652,6 +672,8 @@ def __init__( batch_size (:obj:`None` or :obj:`int`): Number of simultaneous (batched) Models to compute. validate (:obj:`bool`): Whether to validate against a JSON schema + clip_sample_data (:obj:`None` or :obj:`float`): Clip the minimum value of expected data by-sample. Default is no clipping. + clip_bin_data (:obj:`None` or :obj:`float`): Clip the minimum value of expected data by-bin. Default is no clipping. config_kwargs: Possible keyword arguments for the model configuration Returns: @@ -686,6 +708,8 @@ def __init__( modifiers=modifiers, nominal_rates=_nominal_rates, batch_size=self.batch_size, + clip_sample_data=clip_sample_data, + clip_bin_data=clip_bin_data, ) # the below call needs auxdata order for example From 665764dea9c0fca7aaf5d16b67fc7836b1977372 Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Wed, 20 Apr 2022 12:36:10 -0700 Subject: [PATCH 3/6] fix clipping --- src/pyhf/pdf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pyhf/pdf.py b/src/pyhf/pdf.py index dddc6637c0..18f7314d17 100644 --- a/src/pyhf/pdf.py +++ b/src/pyhf/pdf.py @@ -631,7 +631,7 @@ def expected_data(self, pars, return_by_sample=False): allfac = tensorlib.concatenate(factors + [nom_plus_delta]) newbysample = tensorlib.product(allfac, axis=0) - if self.clip_sample_data: + if self.clip_sample_data is not None: newbysample = tensorlib.clip( newbysample, self.clip_sample_data, max_value=None ) @@ -643,7 +643,7 @@ def expected_data(self, pars, return_by_sample=False): return batch_first newresults = tensorlib.sum(newbysample, axis=0) - if self.clip_bin_data: + if self.clip_bin_data is not None: newresults = tensorlib.clip(newresults, self.clip_bin_data, max_value=None) if self.batch_size is None: From 5fbf39d8b32b2b4fe247fcc3c402c4cafd6ad86f Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Wed, 20 Apr 2022 13:05:10 -0700 Subject: [PATCH 4/6] add test for clipping --- tests/test_pdf.py | 112 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/tests/test_pdf.py b/tests/test_pdf.py index 25df078972..9e624a8268 100644 --- a/tests/test_pdf.py +++ b/tests/test_pdf.py @@ -1070,3 +1070,115 @@ def make_model( pyhf.tensorlib.astensor(1.05), pyhf.tensorlib.astensor([5.0, 5.0]), ) + + +def test_pdf_clipping(backend): + tensorlib, optimizer = pyhf.get_backend() + + spec = { + "channels": [ + { + "name": "ch1", + "samples": [ + { + "data": [100.0, 100.0], + "modifiers": [ + {"data": None, "name": "mu_sig", "type": "normfactor"}, + { + "data": { + "hi_data": [125.0, 75.0], + "lo_data": [175.0, 10.0], + }, + "name": "np_1", + "type": "histosys", + }, + { + "data": { + "hi_data": [125.0, 75.0], + "lo_data": [175.0, 10.0], + }, + "name": "np_2", + "type": "histosys", + }, + ], + "name": "signal", + } + ], + }, + { + "name": "ch2", + "samples": [ + { + "data": [15000.0], + "modifiers": [ + {"data": None, "name": "mu_sig", "type": "normfactor"}, + { + "data": {"hi_data": [15500.0], "lo_data": [1200.0]}, + "name": "np_1", + "type": "histosys", + }, + { + "data": {"hi_data": [15500.0], "lo_data": [1200.0]}, + "name": "np_2", + "type": "histosys", + }, + ], + "name": "signal", + } + ], + }, + ], + "measurements": [ + {"config": {"parameters": [], "poi": "mu_sig"}, "name": "meas"} + ], + "observations": [ + {"data": [100], "name": "ch1"}, + {"data": [1000], "name": "ch2"}, + ], + "version": "1.0.0", + } + + par_values = [] + + ws = pyhf.Workspace(spec) + model = ws.model() + data = tensorlib.astensor([100.0, 100.0, 10.0, 0.0, 0.0]) + + for par_name in model.config.par_names(): + if "np" in par_name: + par_values.append(-0.6) # np_1 / np_2 + else: + par_values.append(1.0) # mu + + pars = tensorlib.astensor(par_values) + + # Check with no clipping + assert any(value < 0 for value in model.expected_actualdata(pars)) + assert tensorlib.tolist(model.expected_actualdata(pars)) == pytest.approx( + [1.830496e02, -7.040000e-03, -6.355968e02], abs=1e-3 + ) + + # Check with clipping by-sample + model_clip_sample = ws.model(clip_sample_data=0.0) + assert all(value >= 0 for value in model_clip_sample.expected_actualdata(pars)) + assert tensorlib.tolist( + model_clip_sample.expected_actualdata(pars) + ) == pytest.approx([1.830496e02, 0.0, 0.0], abs=1e-3) + + # Check with clipping by-bin + model_clip_bin = ws.model(clip_bin_data=0.0) + assert all(value >= 0 for value in model_clip_bin.expected_actualdata(pars)) + assert tensorlib.tolist(model_clip_bin.expected_actualdata(pars)) == pytest.approx( + [1.830496e02, 0.0, 0.0], abs=1e-3 + ) + + # Minuit cannot handle negative yields, confirm that MLE fails for minuit specifically + if optimizer.name == 'minuit': + with pytest.raises(pyhf.exceptions.FailedMinimization): + pyhf.infer.mle.fit(data, model) + else: + pyhf.infer.mle.fit(data, model) + + # We should be able to converge when clipping is enabled + pyhf.infer.mle.fit(data, model_clip_sample) + pyhf.infer.mle.fit(data, model_clip_bin) From eec1b82544799968e94217a108a982bb87a83ae7 Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Wed, 20 Apr 2022 13:16:26 -0700 Subject: [PATCH 5/6] drop extraneous tensorlib calls --- src/pyhf/pdf.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/pyhf/pdf.py b/src/pyhf/pdf.py index 18f7314d17..9f1361b31b 100644 --- a/src/pyhf/pdf.py +++ b/src/pyhf/pdf.py @@ -556,7 +556,6 @@ def has_pdf(self): return True def make_pdf(self, pars): - tensorlib, _ = get_backend() lambdas_data = self.expected_data(pars) return prob.Independent(prob.Poisson(lambdas_data)) @@ -820,8 +819,6 @@ def make_pdf(self, pars): pdf: A distribution object implementing the main measurement pdf of HistFactory """ - tensorlib, _ = get_backend() - pdfobjs = [] mainpdf = self.main_model.make_pdf(pars) if mainpdf: From c72af40aec622942f2ce30cc0979966645ba9e61 Mon Sep 17 00:00:00 2001 From: Giordon Stark Date: Tue, 21 Jun 2022 16:18:54 -0400 Subject: [PATCH 6/6] add warning --- src/pyhf/pdf.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/pyhf/pdf.py b/src/pyhf/pdf.py index 9f1361b31b..b145a89721 100644 --- a/src/pyhf/pdf.py +++ b/src/pyhf/pdf.py @@ -526,6 +526,14 @@ def __init__( self.clip_sample_data = clip_sample_data self.clip_bin_data = clip_bin_data + if self.clip_sample_data is not None: + log.warning( + f"Clipping expected data per-bin for each sample below {self.clip_sample_data}" + ) + + if self.clip_bin_data is not None: + log.warning(f"Clipping expected data per-bin below {self.clip_bin_data}") + self._nominal_rates = default_backend.tile( nominal_rates, (1, 1, self.batch_size or 1, 1) )