From 20ec4e476ebccad2c7d209c61d585bc39f395a81 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 29 May 2019 17:18:09 +0100 Subject: [PATCH 1/7] Provide Extension.Dtype.construct_from_string by default --- pandas/core/dtypes/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index da8908ec39095..c55026a4f5c80 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -202,7 +202,10 @@ def construct_from_string(cls, string): ... raise TypeError("Cannot construct a '{}' from " ... "'{}'".format(cls, string)) """ - raise AbstractMethodError(cls) + if string != cls.name: + raise TypeError("Cannot construct a '{}' from '{}'".format( + cls.__name__, string)) + return cls() @classmethod def is_dtype(cls, dtype): From 661d5225583514281bb4ff2692a3e17bd1378636 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 29 May 2019 20:42:58 +0100 Subject: [PATCH 2/7] Improving construct_from_string docstring, and remove unnecessary overwrite of the method --- pandas/core/arrays/integer.py | 11 ----------- pandas/core/dtypes/base.py | 29 +++++++++++++++++++++-------- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 42aa6a055acca..29337b7f76131 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -78,17 +78,6 @@ def construct_array_type(cls): """ return IntegerArray - @classmethod - def construct_from_string(cls, string): - """ - Construction from a string, raise a TypeError if not - possible - """ - if string == cls.name: - return cls() - raise TypeError("Cannot construct a '{}' from " - "'{}'".format(cls, string)) - def integer_array(values, dtype=None, copy=False): """ diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index c55026a4f5c80..c1b2c35e6be9a 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -174,15 +174,26 @@ def construct_array_type(cls): @classmethod def construct_from_string(cls, string): """ - Attempt to construct this type from a string. + Construct this type from a string. + + This is useful mainly for data types that accept parameters. + For example, datetime types can accept units (e.g. nanoseconds) + or timezones. In those cases, the expected type could be + something like ``datetime64[ns, UTC]``. + + By default, in the abstract class, just the name of the type is + expected. But subclasses can overwrite this method to accept + type parameters. Parameters ---------- string : str + The name of the type, for example ``category``. Returns ------- - self : instance of 'cls' + ExtensionDtype + Instance of the dtype. Raises ------ @@ -191,16 +202,18 @@ def construct_from_string(cls, string): Examples -------- - If the extension dtype can be constructed without any arguments, - the following may be an adequate implementation. + For extension dtypes with arguments the following may be an + adequate implementation. >>> @classmethod - ... def construct_from_string(cls, string) - ... if string == cls.name: - ... return cls() + ... def construct_from_string(cls, string): + ... pattern = re.compile(r"^my_type\[(?P.+)\]$") + ... match = pattern.match(string) + ... if match: + ... return cls(**match.groupdict()) ... else: ... raise TypeError("Cannot construct a '{}' from " - ... "'{}'".format(cls, string)) + ... "'{}'".format(cls.__name__, string)) """ if string != cls.name: raise TypeError("Cannot construct a '{}' from '{}'".format( From 8ff1965698175caa09bc1776a099eb35eec4618e Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 29 May 2019 20:47:08 +0100 Subject: [PATCH 3/7] Ignoring flake8 false positive --- pandas/core/dtypes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index c1b2c35e6be9a..296cfdc60a595 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -207,7 +207,7 @@ def construct_from_string(cls, string): >>> @classmethod ... def construct_from_string(cls, string): - ... pattern = re.compile(r"^my_type\[(?P.+)\]$") + ... pattern = re.compile(r"^my_type\[(?P.+)\]$") # noqa ... match = pattern.match(string) ... if match: ... return cls(**match.groupdict()) From a50301a8017a10c155bfec7eb1fb6fdd96ba4723 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 30 May 2019 11:19:07 +0100 Subject: [PATCH 4/7] Addressing CI and PR comments: implementing tests, fixing docstring error and removing CategoricalDtype.construct_from_string --- pandas/core/dtypes/base.py | 11 +++++----- pandas/core/dtypes/dtypes.py | 13 ------------ pandas/tests/dtypes/test_dtypes.py | 32 +++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 296cfdc60a595..aa7c68acc186f 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -173,17 +173,16 @@ def construct_array_type(cls): @classmethod def construct_from_string(cls, string): - """ + r""" Construct this type from a string. This is useful mainly for data types that accept parameters. - For example, datetime types can accept units (e.g. nanoseconds) - or timezones. In those cases, the expected type could be - something like ``datetime64[ns, UTC]``. + For example, a period dtype accepts a frequency parameter that + can be set as ``period[H]`` (where H means hourly frequency). By default, in the abstract class, just the name of the type is expected. But subclasses can overwrite this method to accept - type parameters. + parameters. Parameters ---------- @@ -207,7 +206,7 @@ def construct_from_string(cls, string): >>> @classmethod ... def construct_from_string(cls, string): - ... pattern = re.compile(r"^my_type\[(?P.+)\]$") # noqa + ... pattern = re.compile(r"^my_type\[(?P.+)\]$") ... match = pattern.match(string) ... if match: ... return cls(**match.groupdict()) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 32047c3fbb5e1..a56ee72cf1910 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -440,19 +440,6 @@ def construct_array_type(cls): from pandas import Categorical return Categorical - @classmethod - def construct_from_string(cls, string): - """ - attempt to construct this type from a string, raise a TypeError if - it's not possible """ - try: - if string == 'category': - return cls() - else: - raise TypeError("cannot construct a CategoricalDtype") - except AttributeError: - pass - @staticmethod def validate_ordered(ordered): """ diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 79ebfcc30a7e4..aab1c46549f37 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -51,6 +51,36 @@ def test_pickle(self): assert result == self.dtype +class TestBaseDtype: + def setup_method(self): + class DummyDtype(pd.core.dtypes.base.ExtensionDtype): + name = 'dummy' + self.dtype = DummyDtype() + + def test_str(self): + assert str(self.dtype) == self.dtype.name + + def test_eq(self): + assert self.dtype == 'dummy' + assert self.dtype != 'anonther_type' + + def test_default_kind(self): + assert self.dtype.kind == 'O' + + def test_construct_from_string(self): + dtype_instance = self.dtype.__class__.construct_from_string('dummy') + assert isinstance(dtype_instance, self.dtype.__class__) + with pytest.raises(TypeError, match="Cannot construct a 'DummyDtype' " + "from 'another_type'"): + self.dtype.__class__.construct_from_string('another_type') + + def test_default_is_numeric(self): + assert not self.dtype._is_numeric + + def test_default_is_boolean(self): + assert not self.dtype._is_numeric + + class TestCategoricalDtype(Base): def create(self): @@ -82,7 +112,7 @@ def test_equality(self): def test_construction_from_string(self): result = CategoricalDtype.construct_from_string('category') assert is_dtype_equal(self.dtype, result) - msg = "cannot construct a CategoricalDtype" + msg = "Cannot construct a 'CategoricalDtype' from 'foo'" with pytest.raises(TypeError, match=msg): CategoricalDtype.construct_from_string('foo') From e04ed7904688d9e3f477688af862cb192e2229b0 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Thu, 30 May 2019 11:31:35 +0100 Subject: [PATCH 5/7] Adding type annotations --- pandas/core/dtypes/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index aa7c68acc186f..0a0ba69659570 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -172,7 +172,7 @@ def construct_array_type(cls): raise NotImplementedError @classmethod - def construct_from_string(cls, string): + def construct_from_string(cls, string: str): r""" Construct this type from a string. @@ -220,7 +220,7 @@ def construct_from_string(cls, string): return cls() @classmethod - def is_dtype(cls, dtype): + def is_dtype(cls, dtype) -> bool: """Check if we match 'dtype'. Parameters From 7fa61d2ad5f36f7342fa58c471c71cdba3c08b2d Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 5 Jun 2019 15:39:36 +0100 Subject: [PATCH 6/7] Moving tests to pandas/tests/extension/ --- pandas/core/dtypes/base.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 30 ---------------------------- pandas/tests/extension/base/dtype.py | 14 +++++++++++++ 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 0a0ba69659570..9fafcc61b82db 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -216,7 +216,7 @@ def construct_from_string(cls, string: str): """ if string != cls.name: raise TypeError("Cannot construct a '{}' from '{}'".format( - cls.__name__, string)) + cls, string)) return cls() @classmethod diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index aab1c46549f37..cf368f9980d72 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -51,36 +51,6 @@ def test_pickle(self): assert result == self.dtype -class TestBaseDtype: - def setup_method(self): - class DummyDtype(pd.core.dtypes.base.ExtensionDtype): - name = 'dummy' - self.dtype = DummyDtype() - - def test_str(self): - assert str(self.dtype) == self.dtype.name - - def test_eq(self): - assert self.dtype == 'dummy' - assert self.dtype != 'anonther_type' - - def test_default_kind(self): - assert self.dtype.kind == 'O' - - def test_construct_from_string(self): - dtype_instance = self.dtype.__class__.construct_from_string('dummy') - assert isinstance(dtype_instance, self.dtype.__class__) - with pytest.raises(TypeError, match="Cannot construct a 'DummyDtype' " - "from 'another_type'"): - self.dtype.__class__.construct_from_string('another_type') - - def test_default_is_numeric(self): - assert not self.dtype._is_numeric - - def test_default_is_boolean(self): - assert not self.dtype._is_numeric - - class TestCategoricalDtype(Base): def create(self): diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index e9d1f183812cc..7b9dedceb00d4 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -1,6 +1,7 @@ import warnings import numpy as np +import pytest import pandas as pd @@ -89,3 +90,16 @@ def test_check_dtype(self, data): def test_hashable(self, dtype): hash(dtype) # no error + + def test_str(self, dtype): + assert str(dtype) == dtype.name + + def test_eq(self, dtype): + assert dtype == dtype.name + assert dtype != 'anonther_type' + + def test_construct_from_string(self, dtype): + dtype_instance = dtype.__class__.construct_from_string(dtype.name) + assert isinstance(dtype_instance, dtype.__class__) + with pytest.raises(TypeError): + dtype.__class__.construct_from_string('another_type') From 2897ad2f98795a0a81eab112ae7fba9e98d87660 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Wed, 5 Jun 2019 16:36:54 +0100 Subject: [PATCH 7/7] Fixing test (error message) --- pandas/core/dtypes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 9fafcc61b82db..0a0ba69659570 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -216,7 +216,7 @@ def construct_from_string(cls, string: str): """ if string != cls.name: raise TypeError("Cannot construct a '{}' from '{}'".format( - cls, string)) + cls.__name__, string)) return cls() @classmethod