Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 35 additions & 4 deletions src/pyhf/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import copy
import logging
from typing import List
from typing import List, Union

import pyhf.parameters
import pyhf
Expand Down Expand Up @@ -506,7 +506,15 @@ def logpdf(self, auxdata, pars):
class _MainModel:
"""Factory class to create pdfs for the main measurement."""

def __init__(self, config, modifiers, nominal_rates, batch_size=None):
def __init__(
self,
config,
modifiers,
nominal_rates,
batch_size=None,
clip_sample_data: Union[float, None] = None,
clip_bin_data: Union[float, None] = None,
):
default_backend = pyhf.default_backend

self.config = config
Expand All @@ -515,6 +523,17 @@ def __init__(self, config, modifiers, nominal_rates, batch_size=None):
self._delta_mods = []
self.batch_size = batch_size

self.clip_sample_data = clip_sample_data
self.clip_bin_data = clip_bin_data

if self.clip_sample_data is not None:
log.warning(
f"Clipping expected data per-bin for each sample below {self.clip_sample_data}"
)

if self.clip_bin_data is not None:
log.warning(f"Clipping expected data per-bin below {self.clip_bin_data}")

self._nominal_rates = default_backend.tile(
nominal_rates, (1, 1, self.batch_size or 1, 1)
)
Expand Down Expand Up @@ -619,13 +638,21 @@ def expected_data(self, pars, return_by_sample=False):
allfac = tensorlib.concatenate(factors + [nom_plus_delta])

newbysample = tensorlib.product(allfac, axis=0)
if self.clip_sample_data is not None:
newbysample = tensorlib.clip(
newbysample, self.clip_sample_data, max_value=None
)

if return_by_sample:
batch_first = tensorlib.einsum('ij...->ji...', newbysample)
if self.batch_size is None:
return batch_first[0]
return batch_first

newresults = tensorlib.sum(newbysample, axis=0)
if self.clip_bin_data is not None:
newresults = tensorlib.clip(newresults, self.clip_bin_data, max_value=None)

if self.batch_size is None:
return newresults[0]
return newresults
Expand All @@ -640,6 +667,8 @@ def __init__(
modifier_set=None,
batch_size=None,
validate: bool = True,
clip_sample_data: Union[float, None] = None,
clip_bin_data: Union[float, None] = None,
**config_kwargs,
):
"""
Expand All @@ -650,6 +679,8 @@ def __init__(
batch_size (:obj:`None` or :obj:`int`): Number of simultaneous (batched)
Models to compute.
validate (:obj:`bool`): Whether to validate against a JSON schema
clip_sample_data (:obj:`None` or :obj:`float`): Clip the minimum value of expected data by-sample. Default is no clipping.
clip_bin_data (:obj:`None` or :obj:`float`): Clip the minimum value of expected data by-bin. Default is no clipping.
config_kwargs: Possible keyword arguments for the model configuration

Returns:
Expand Down Expand Up @@ -684,6 +715,8 @@ def __init__(
modifiers=modifiers,
nominal_rates=_nominal_rates,
batch_size=self.batch_size,
clip_sample_data=clip_sample_data,
clip_bin_data=clip_bin_data,
)

# the below call needs auxdata order for example
Expand Down Expand Up @@ -794,8 +827,6 @@ def make_pdf(self, pars):
pdf: A distribution object implementing the main measurement pdf of HistFactory

"""
tensorlib, _ = get_backend()

pdfobjs = []
mainpdf = self.main_model.make_pdf(pars)
if mainpdf:
Expand Down
112 changes: 112 additions & 0 deletions tests/test_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1070,3 +1070,115 @@ def make_model(
pyhf.tensorlib.astensor(1.05),
pyhf.tensorlib.astensor([5.0, 5.0]),
)


def test_pdf_clipping(backend):
tensorlib, optimizer = pyhf.get_backend()

spec = {
"channels": [
{
"name": "ch1",
"samples": [
{
"data": [100.0, 100.0],
"modifiers": [
{"data": None, "name": "mu_sig", "type": "normfactor"},
{
"data": {
"hi_data": [125.0, 75.0],
"lo_data": [175.0, 10.0],
},
"name": "np_1",
"type": "histosys",
},
{
"data": {
"hi_data": [125.0, 75.0],
"lo_data": [175.0, 10.0],
},
"name": "np_2",
"type": "histosys",
},
],
"name": "signal",
}
],
},
{
"name": "ch2",
"samples": [
{
"data": [15000.0],
"modifiers": [
{"data": None, "name": "mu_sig", "type": "normfactor"},
{
"data": {"hi_data": [15500.0], "lo_data": [1200.0]},
"name": "np_1",
"type": "histosys",
},
{
"data": {"hi_data": [15500.0], "lo_data": [1200.0]},
"name": "np_2",
"type": "histosys",
},
],
"name": "signal",
}
],
},
],
"measurements": [
{"config": {"parameters": [], "poi": "mu_sig"}, "name": "meas"}
],
"observations": [
{"data": [100], "name": "ch1"},
{"data": [1000], "name": "ch2"},
],
"version": "1.0.0",
}

par_values = []

ws = pyhf.Workspace(spec)
model = ws.model()
data = tensorlib.astensor([100.0, 100.0, 10.0, 0.0, 0.0])

for par_name in model.config.par_names():
if "np" in par_name:
par_values.append(-0.6) # np_1 / np_2
else:
par_values.append(1.0) # mu

pars = tensorlib.astensor(par_values)

# Check with no clipping
assert any(value < 0 for value in model.expected_actualdata(pars))
assert tensorlib.tolist(model.expected_actualdata(pars)) == pytest.approx(
[1.830496e02, -7.040000e-03, -6.355968e02], abs=1e-3
)

# Check with clipping by-sample
model_clip_sample = ws.model(clip_sample_data=0.0)
assert all(value >= 0 for value in model_clip_sample.expected_actualdata(pars))
assert tensorlib.tolist(
model_clip_sample.expected_actualdata(pars)
) == pytest.approx([1.830496e02, 0.0, 0.0], abs=1e-3)

# Check with clipping by-bin
model_clip_bin = ws.model(clip_bin_data=0.0)
assert all(value >= 0 for value in model_clip_bin.expected_actualdata(pars))
assert tensorlib.tolist(model_clip_bin.expected_actualdata(pars)) == pytest.approx(
[1.830496e02, 0.0, 0.0], abs=1e-3
)

# Minuit cannot handle negative yields, confirm that MLE fails for minuit specifically
if optimizer.name == 'minuit':
with pytest.raises(pyhf.exceptions.FailedMinimization):
pyhf.infer.mle.fit(data, model)
else:
pyhf.infer.mle.fit(data, model)

# We should be able to converge when clipping is enabled
pyhf.infer.mle.fit(data, model_clip_sample)
pyhf.infer.mle.fit(data, model_clip_bin)