Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions pymc/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -1191,6 +1191,33 @@ def logp(value, p):
msg="0 <= p <=1, sum(p) = 1",
)

def icdf(value, p):
eps = 1e-12
q = value
q_safe = pt.clip(q, 0.0, 1.0 - eps)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see why we need q_safe?

cdf = pt.cumsum(p, axis=-1)

cdf_batch_ndim = cdf.ndim - 1
q_ndim = q_safe.ndim
if q_ndim < cdf_batch_ndim:
q_safe = pt.shape_padleft(q_safe, cdf_batch_ndim - q_ndim)
elif q_ndim > cdf_batch_ndim:
extra = q_ndim - cdf_batch_ndim
axes = list(range(cdf.ndim - 1)) + ["x"] * extra + [cdf.ndim - 1]
cdf = cdf.dimshuffle(axes)
Comment on lines +1205 to +1207
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use expand_dims for readability


mask = pt.shape_padright(q_safe, 1) <= cdf
idx = pt.argmax(mask, axis=-1).astype("int64")
Comment on lines +1209 to +1210
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use search_sorted?


idx = check_icdf_value(idx, q)
return check_icdf_parameters(
idx,
0 <= p,
p <= 1,
pt.isclose(pt.sum(p, axis=-1), 1),
msg="0 <= p <=1, sum(p) = 1",
)

def logcdf(value, p):
k = pt.shape(p)[-1]
value, safe_value_p = Categorical._safe_index_value_p(value, p.cumsum(-1))
Expand Down
27 changes: 27 additions & 0 deletions tests/distributions/test_discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,33 @@ class TestCategorical(BaseTestDistributionRandom):
"check_rv_size",
]

@pytest.mark.parametrize("n", [2, 3, 4])
def test_categorical_icdf(self, n):
paramdomains = {"p": Simplex(n)}

def numpy_categorical_ppf(q, p):
cdf = np.cumsum(p, axis=-1)
q = np.asarray(q)
return np.argmax(q[..., None] <= cdf, axis=-1)

check_icdf(pm.Categorical, paramdomains, numpy_categorical_ppf)

def test_categorical_icdf_batch_shapes(self):
p = np.array([[0.2, 0.3, 0.5], [0.1, 0.1, 0.8]])
q_vec = np.array([0.0, 0.25])
dist = pm.Categorical.dist(p=p)
out_vec = icdf(dist, q_vec).eval()
np.testing.assert_array_equal(out_vec, np.array([0, 2]))
q_mat = np.array([[0.05, 0.6, 0.99], [0.21, 0.19, 0.81]])
out_mat = icdf(dist, q_mat).eval()
np.testing.assert_array_equal(out_mat, np.array([[0, 2, 2], [2, 1, 2]]))

def test_categorical_icdf_upper_edge(self):
p = np.array([0.1, 0.2, 0.7])
dist = pm.Categorical.dist(p=p)
out = icdf(dist, np.array([1.0])).eval()
assert out[0] == 2


class TestLogitCategorical(BaseTestDistributionRandom):
pymc_dist = pm.Categorical
Expand Down