diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fc53e044a6544..5da327a82c02b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5326,7 +5326,9 @@ def _is_memory_usage_qualified(self) -> bool: """ Return a boolean if we need a qualified .info display. """ - return is_object_dtype(self.dtype) + return is_object_dtype(self.dtype) or ( + is_string_dtype(self.dtype) and self.dtype.storage == "python" # type: ignore[union-attr] + ) def __contains__(self, key: Any) -> bool: """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9e002ccd3a787..7cb28214c7289 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -65,6 +65,7 @@ is_list_like, is_object_dtype, is_scalar, + is_string_dtype, pandas_dtype, ) from pandas.core.dtypes.dtypes import ( @@ -1344,10 +1345,12 @@ def dtype(self) -> np.dtype: def _is_memory_usage_qualified(self) -> bool: """return a boolean if we need a qualified .info display""" - def f(level) -> bool: - return "mixed" in level or "string" in level or "unicode" in level + def f(dtype) -> bool: + return is_object_dtype(dtype) or ( + is_string_dtype(dtype) and dtype.storage == "python" + ) - return any(f(level) for level in self._inferred_type_levels) + return any(f(level.dtype) for level in self.levels) # Cannot determine type of "memory_usage" @doc(Index.memory_usage) # type: ignore[has-type] diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py index 475632667a87a..f0ae00fa6febb 100644 --- a/pandas/tests/frame/methods/test_info.py +++ b/pandas/tests/frame/methods/test_info.py @@ -10,6 +10,7 @@ from pandas._config import using_string_dtype from pandas.compat import ( + HAS_PYARROW, IS64, PYPY, ) @@ -435,18 +436,25 @@ def test_usage_via_getsizeof(): assert abs(diff) < 100 -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") -def test_info_memory_usage_qualified(): +def test_info_memory_usage_qualified(using_infer_string): buf = StringIO() df = DataFrame(1, columns=list("ab"), index=[1, 2, 3]) df.info(buf=buf) assert "+" not in buf.getvalue() buf = StringIO() - df = DataFrame(1, columns=list("ab"), index=list("ABC")) + df = DataFrame(1, columns=list("ab"), index=Index(list("ABC"), dtype=object)) df.info(buf=buf) assert "+" in buf.getvalue() + buf = StringIO() + df = DataFrame(1, columns=list("ab"), index=Index(list("ABC"), dtype="str")) + df.info(buf=buf) + if using_infer_string and HAS_PYARROW: + assert "+" not in buf.getvalue() + else: + assert "+" in buf.getvalue() + buf = StringIO() df = DataFrame( 1, columns=list("ab"), index=MultiIndex.from_product([range(3), range(3)]) @@ -459,7 +467,10 @@ def test_info_memory_usage_qualified(): 1, columns=list("ab"), index=MultiIndex.from_product([range(3), ["foo", "bar"]]) ) df.info(buf=buf) - assert "+" in buf.getvalue() + if using_infer_string and HAS_PYARROW: + assert "+" not in buf.getvalue() + else: + assert "+" in buf.getvalue() def test_info_memory_usage_bug_on_multiindex(): @@ -496,16 +507,15 @@ def test_info_categorical(): df.info(buf=buf) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") @pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system") -def test_info_int_columns(): +def test_info_int_columns(using_infer_string): # GH#37245 df = DataFrame({1: [1, 2], 2: [2, 3]}, index=["A", "B"]) buf = StringIO() df.info(show_counts=True, buf=buf) result = buf.getvalue() expected = textwrap.dedent( - """\ + f"""\ Index: 2 entries, A to B Data columns (total 2 columns): @@ -514,19 +524,23 @@ def test_info_int_columns(): 0 1 2 non-null int64 1 2 2 non-null int64 dtypes: int64(2) - memory usage: 48.0+ bytes + memory usage: {'50.0' if using_infer_string and HAS_PYARROW else '48.0+'} bytes """ ) assert result == expected @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") -def test_memory_usage_empty_no_warning(): +def test_memory_usage_empty_no_warning(using_infer_string): # GH#50066 df = DataFrame(index=["a", "b"]) with tm.assert_produces_warning(None): result = df.memory_usage() - expected = Series(16 if IS64 else 8, index=["Index"]) + if using_infer_string and HAS_PYARROW: + value = 18 + else: + value = 16 if IS64 else 8 + expected = Series(value, index=["Index"]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/methods/test_info.py b/pandas/tests/series/methods/test_info.py index 8fac40fe5fb25..7defad8a463f3 100644 --- a/pandas/tests/series/methods/test_info.py +++ b/pandas/tests/series/methods/test_info.py @@ -7,10 +7,14 @@ from pandas._config import using_string_dtype -from pandas.compat import PYPY +from pandas.compat import ( + HAS_PYARROW, + PYPY, +) from pandas import ( CategoricalIndex, + Index, MultiIndex, Series, date_range, @@ -41,7 +45,9 @@ def test_info_categorical(): @pytest.mark.parametrize("verbose", [True, False]) -def test_info_series(lexsorted_two_level_string_multiindex, verbose): +def test_info_series( + lexsorted_two_level_string_multiindex, verbose, using_infer_string +): index = lexsorted_two_level_string_multiindex ser = Series(range(len(index)), index=index, name="sth") buf = StringIO() @@ -63,10 +69,11 @@ def test_info_series(lexsorted_two_level_string_multiindex, verbose): 10 non-null int64 """ ) + qualifier = "" if using_infer_string and HAS_PYARROW else "+" expected += textwrap.dedent( f"""\ dtypes: int64(1) - memory usage: {ser.memory_usage()}.0+ bytes + memory usage: {ser.memory_usage()}.0{qualifier} bytes """ ) assert result == expected @@ -142,20 +149,21 @@ def test_info_memory_usage_deep_pypy(): assert s_object.memory_usage(deep=True) == s_object.memory_usage() -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( - "series, plus", + "index, plus", [ - (Series(1, index=[1, 2, 3]), False), - (Series(1, index=list("ABC")), True), - (Series(1, index=MultiIndex.from_product([range(3), range(3)])), False), + ([1, 2, 3], False), + (Index(list("ABC"), dtype="str"), not (using_string_dtype() and HAS_PYARROW)), + (Index(list("ABC"), dtype=object), True), + (MultiIndex.from_product([range(3), range(3)]), False), ( - Series(1, index=MultiIndex.from_product([range(3), ["foo", "bar"]])), - True, + MultiIndex.from_product([range(3), ["foo", "bar"]]), + not (using_string_dtype() and HAS_PYARROW), ), ], ) -def test_info_memory_usage_qualified(series, plus): +def test_info_memory_usage_qualified(index, plus): + series = Series(1, index=index) buf = StringIO() series.info(buf=buf) if plus: