From ab2817c069866f14aeb334e432cb4c6b49271112 Mon Sep 17 00:00:00 2001 From: Espoir Murhabazi Date: Sat, 9 Nov 2024 15:28:56 +0000 Subject: [PATCH 01/10] fix the string datatype --- .gitignore | 1 + pandas/core/arrays/string_.py | 7 +++++-- pandas/tests/api/test_api.py | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index a188e216d9f70..14d7cfe539960 100644 --- a/.gitignore +++ b/.gitignore @@ -137,3 +137,4 @@ doc/source/savefig/ # Interactive terminal generated files # ######################################## .jupyterlite.doit.db +.venv diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index de129df2575d3..a75b8bc9c5a4c 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -64,7 +64,7 @@ from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer from pandas.core.missing import isna - +from pandas.util._decorators import set_module from pandas.io.formats import printing if TYPE_CHECKING: @@ -86,6 +86,7 @@ from pandas import Series +@set_module("pandas") @register_extension_dtype class StringDtype(StorageExtensionDtype): """ @@ -373,7 +374,7 @@ def __from_arrow__( NDArrayBacked.__init__(new_string_array, arr, self) return new_string_array - +@set_module("pandas") class BaseStringArray(ExtensionArray): """ Mixin class for StringArray, ArrowStringArray. @@ -532,6 +533,7 @@ def _str_map_nan_semantics( # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" + class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] """ Extension array for string data. @@ -959,6 +961,7 @@ def _cmp_method(self, other, op): _arith_method = _cmp_method + class StringArrayNumpySemantics(StringArray): _storage = "python" _na_value = np.nan diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 842fa1a151267..68650e24f8d19 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -416,3 +416,7 @@ def test_set_module(): assert pd.Period.__module__ == "pandas" assert pd.Timestamp.__module__ == "pandas" assert pd.Timedelta.__module__ == "pandas" + assert pd.StringDtype.__module__ == "pandas" + assert pd.BaseStringArray.__module__ == "pandas" + assert pd.StringArrayNumpySemantics.___module__ == "pandas" + assert pd.StringArray.__module__ == "pandas" From 7e4e60ad49a939ad7303ad21b28c28b01e52e08a Mon Sep 17 00:00:00 2001 From: Espoir Murhabazi Date: Sat, 9 Nov 2024 15:41:07 +0000 Subject: [PATCH 02/10] remove .env --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 14d7cfe539960..a188e216d9f70 100644 --- a/.gitignore +++ b/.gitignore @@ -137,4 +137,3 @@ doc/source/savefig/ # Interactive terminal generated files # ######################################## .jupyterlite.doit.db -.venv From b1986fce71b519cfd7449a288e8024c45aa8e54f Mon Sep 17 00:00:00 2001 From: Espoir Murhabazi Date: Sat, 9 Nov 2024 15:47:38 +0000 Subject: [PATCH 03/10] fix the import error --- pandas/core/arrays/string_.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index a75b8bc9c5a4c..b92504fe507cd 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -28,7 +28,10 @@ pa_version_under10p1, ) from pandas.compat.numpy import function as nv -from pandas.util._decorators import doc +from pandas.util._decorators import ( + doc, + set_module, +) from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.base import ( @@ -64,7 +67,7 @@ from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer from pandas.core.missing import isna -from pandas.util._decorators import set_module + from pandas.io.formats import printing if TYPE_CHECKING: @@ -374,6 +377,7 @@ def __from_arrow__( NDArrayBacked.__init__(new_string_array, arr, self) return new_string_array + @set_module("pandas") class BaseStringArray(ExtensionArray): """ @@ -534,6 +538,7 @@ def _str_map_nan_semantics( # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" + class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] """ Extension array for string data. @@ -723,7 +728,8 @@ def __arrow_array__(self, type=None): values[self.isna()] = None return pa.array(values, type=type, from_pandas=True) - def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]: # type: ignore[override] + # type: ignore[override] + def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]: arr = self._ndarray return arr, self.dtype.na_value @@ -961,7 +967,6 @@ def _cmp_method(self, other, op): _arith_method = _cmp_method - class StringArrayNumpySemantics(StringArray): _storage = "python" _na_value = np.nan From 374fed8ef0680cb1d54b22e51cdfea33235c80d9 Mon Sep 17 00:00:00 2001 From: Espoir Murhabazi Date: Sat, 9 Nov 2024 16:02:21 +0000 Subject: [PATCH 04/10] fix failed test --- pandas/tests/api/test_api.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 68650e24f8d19..9ea37da7cf46c 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -417,6 +417,3 @@ def test_set_module(): assert pd.Timestamp.__module__ == "pandas" assert pd.Timedelta.__module__ == "pandas" assert pd.StringDtype.__module__ == "pandas" - assert pd.BaseStringArray.__module__ == "pandas" - assert pd.StringArrayNumpySemantics.___module__ == "pandas" - assert pd.StringArray.__module__ == "pandas" From be695476380d47527e369937f372beccf5a05579 Mon Sep 17 00:00:00 2001 From: Espoir Murhabazi Date: Sat, 9 Nov 2024 16:41:52 +0000 Subject: [PATCH 05/10] Update string_.py --- pandas/core/arrays/string_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index b92504fe507cd..70375c172a106 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -728,8 +728,8 @@ def __arrow_array__(self, type=None): values[self.isna()] = None return pa.array(values, type=type, from_pandas=True) - # type: ignore[override] - def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]: + + def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]: # type: ignore[override] arr = self._ndarray return arr, self.dtype.na_value From 9e125e3b8f3d603c9de764d9fb18e1402d5f7c5e Mon Sep 17 00:00:00 2001 From: Espoir Murhabazi Date: Sat, 9 Nov 2024 16:51:27 +0000 Subject: [PATCH 06/10] apply ruff --- pandas/core/arrays/string_.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 70375c172a106..e95b23b061e3b 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -728,7 +728,6 @@ def __arrow_array__(self, type=None): values[self.isna()] = None return pa.array(values, type=type, from_pandas=True) - def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]: # type: ignore[override] arr = self._ndarray From 44155ac368a28c5ab92ed4c056aafa2051112a58 Mon Sep 17 00:00:00 2001 From: Espoir Murhabazi Date: Sat, 9 Nov 2024 18:23:28 +0000 Subject: [PATCH 07/10] remove the module on basestring --- pandas/core/arrays/string_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index e95b23b061e3b..4812aeaa17dcc 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -378,7 +378,6 @@ def __from_arrow__( return new_string_array -@set_module("pandas") class BaseStringArray(ExtensionArray): """ Mixin class for StringArray, ArrowStringArray. @@ -728,7 +727,8 @@ def __arrow_array__(self, type=None): values[self.isna()] = None return pa.array(values, type=type, from_pandas=True) - def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]: # type: ignore[override] + # type: ignore[override] + def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]: arr = self._ndarray return arr, self.dtype.na_value From 1f2fb548a68c871afcce34b3ff5edb644bb1d816 Mon Sep 17 00:00:00 2001 From: Espoir Murhabazi Date: Sat, 9 Nov 2024 18:26:57 +0000 Subject: [PATCH 08/10] Update string_.py --- pandas/core/arrays/string_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 4812aeaa17dcc..a01143d2b4590 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -727,8 +727,8 @@ def __arrow_array__(self, type=None): values[self.isna()] = None return pa.array(values, type=type, from_pandas=True) - # type: ignore[override] - def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]: + + def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]: # type: ignore[override] arr = self._ndarray return arr, self.dtype.na_value From 9dba8f26b34d304d8ce212166466968503dfc678 Mon Sep 17 00:00:00 2001 From: Espoir Murhabazi Date: Sat, 9 Nov 2024 18:27:32 +0000 Subject: [PATCH 09/10] fix stuff --- pandas/core/arrays/string_.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index a01143d2b4590..6ded73210bb0d 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -727,7 +727,6 @@ def __arrow_array__(self, type=None): values[self.isna()] = None return pa.array(values, type=type, from_pandas=True) - def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]: # type: ignore[override] arr = self._ndarray From 996db1fb876b3cc2d3d16db30a3aef986de7e859 Mon Sep 17 00:00:00 2001 From: Espoir Murhabazi Date: Sat, 9 Nov 2024 18:29:19 +0000 Subject: [PATCH 10/10] Update string_.py --- pandas/core/arrays/string_.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 6ded73210bb0d..9b1f986a7158e 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -536,8 +536,6 @@ def _str_map_nan_semantics( # error: Definition of "_concat_same_type" in base class "NDArrayBacked" is # incompatible with definition in base class "ExtensionArray" - - class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] """ Extension array for string data.