Skip to content

The indexes of DataFrame.describe(percentiles=[0.29, 0.57, 0.58]) are incorrect #48298

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Sep 6, 2022
6 changes: 3 additions & 3 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1721,10 +1721,10 @@ def format_percentiles(

percentiles = 100 * percentiles

int_idx = np.isclose(percentiles.astype(int), percentiles)
int_idx = np.isclose(percentiles.round().astype(int), percentiles)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you make percentiles.round().astype(int) a dedicated variable so it can be used here and below?


if np.all(int_idx):
out = percentiles.astype(int).astype(str)
out = percentiles.round().astype(int).astype(str)
return [i + "%" for i in out]

unique_pcts = np.unique(percentiles)
Expand All @@ -1737,7 +1737,7 @@ def format_percentiles(
).astype(int)
prec = max(1, prec)
out = np.empty_like(percentiles, dtype=object)
out[int_idx] = percentiles[int_idx].astype(int).astype(str)
out[int_idx] = percentiles[int_idx].round().astype(int).astype(str)

out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
return [i + "%" for i in out]
Expand Down
35 changes: 22 additions & 13 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3306,24 +3306,33 @@ def test_nat_representations(self):
assert f(NaT) == "NaT"


def test_format_percentiles():
result = fmt.format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])
expected = ["1.999%", "2.001%", "50%", "66.667%", "99.99%"]
@pytest.mark.parametrize(
"percentiles, expected",
[
(
[0.01999, 0.02001, 0.5, 0.666666, 0.9999],
["1.999%", "2.001%", "50%", "66.667%", "99.99%"],
),
(
[0, 0.5, 0.02001, 0.5, 0.666666, 0.9999],
["0%", "50%", "2.0%", "50%", "66.67%", "99.99%"],
),
([0.281, 0.29, 0.57, 0.58], ["28.1%", "29%", "57%", "58%"]),
],
)
def test_format_percentiles(percentiles, expected):
result = fmt.format_percentiles(percentiles)
assert result == expected

result = fmt.format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
expected = ["0%", "50%", "2.0%", "50%", "66.67%", "99.99%"]
assert result == expected

@pytest.mark.parametrize(
"percentiles",
[([0.1, np.nan, 0.5]), ([-0.001, 0.1, 0.5]), ([2, 0.1, 0.5]), ([0.1, 0.5, "a"])],
)
def test_error_format_percentiles(percentiles):
msg = r"percentiles should all be in the interval \[0,1\]"
with pytest.raises(ValueError, match=msg):
fmt.format_percentiles([0.1, np.nan, 0.5])
with pytest.raises(ValueError, match=msg):
fmt.format_percentiles([-0.001, 0.1, 0.5])
with pytest.raises(ValueError, match=msg):
fmt.format_percentiles([2, 0.1, 0.5])
with pytest.raises(ValueError, match=msg):
fmt.format_percentiles([0.1, 0.5, "a"])
fmt.format_percentiles(percentiles)


def test_format_percentiles_integer_idx():
Expand Down