Skip to content

clib.conversion._to_numpy: Add tests for pandas.Series with datetime dtypes #3670

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jan 9, 2025
Merged
11 changes: 11 additions & 0 deletions pygmt/clib/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,17 @@
numpy_dtype = np.float64
data = data.to_numpy(na_value=np.nan)

# Deal with timezone-aware datetime dtypes.
if isinstance(dtype, pd.DatetimeTZDtype): # pandas.DatetimeTZDtype
numpy_dtype = getattr(dtype, "base", None)
elif isinstance(dtype, pd.ArrowDtype) and hasattr(dtype.pyarrow_dtype, "tz"):
# pd.ArrowDtype[pa.Timestamp]
numpy_dtype = getattr(dtype, "numpy_dtype", None)
# TODO(pandas>=2.1): Remove the workaround for pandas<2.1.
if Version(pd.__version__) < Version("2.1"):
# In pandas 2.0, dtype.numpy_type is dtype("O").
numpy_dtype = np.dtype(f"M8[{dtype.pyarrow_dtype.unit}]") # type: ignore[assignment, attr-defined]

Check warning on line 204 in pygmt/clib/conversion.py

View check run for this annotation

Codecov / codecov/patch

pygmt/clib/conversion.py#L204

Added line #L204 was not covered by tests
Comment on lines +202 to +204
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a TODO here to remove this once we drop support for pandas 2.0? Should be after 2025-08-29 according to https://scientific-python.org/specs/spec-0000/#support-window


array = np.ascontiguousarray(data, dtype=numpy_dtype)

# Check if a np.object_ array can be converted to np.str_.
Expand Down
107 changes: 107 additions & 0 deletions pygmt/tests/test_clib_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,113 @@ def test_to_numpy_pandas_date(dtype, expected_dtype):
)


pandas_old_version = pytest.mark.xfail(
condition=Version(pd.__version__) < Version("2.1"),
reason="pandas 2.0 bug reported in https://github.com/pandas-dev/pandas/issues/52705",
)


@pytest.mark.parametrize(
("dtype", "expected_dtype"),
[
# NumPy datetime64 types. Only unit 's'/'ms'/'us'/'ns' are supported.
pytest.param("datetime64[s]", "datetime64[s]", id="datetime64[s]"),
pytest.param("datetime64[ms]", "datetime64[ms]", id="datetime64[ms]"),
pytest.param("datetime64[us]", "datetime64[us]", id="datetime64[us]"),
pytest.param("datetime64[ns]", "datetime64[ns]", id="datetime64[ns]"),
# pandas.DatetimeTZDtype can be given in two ways [tz is required]:
# 1. pandas.DatetimeTZDtype(unit, tz)
# 2. String aliases: "datetime64[unit, tz]"
pytest.param(
"datetime64[s, UTC]",
"datetime64[s]",
id="datetime64[s, tz=UTC]",
marks=pandas_old_version,
),
pytest.param(
"datetime64[s, America/New_York]",
"datetime64[s]",
id="datetime64[s, tz=America/New_York]",
marks=pandas_old_version,
),
pytest.param(
"datetime64[s, +07:30]",
"datetime64[s]",
id="datetime64[s, +07:30]",
marks=pandas_old_version,
),
# PyArrow timestamp types can be given in two ways [tz is optional]:
# 1. pd.ArrowDtype(pyarrow.Timestamp(unit, tz=tz))
# 2. String aliases: "timestamp[unit, tz][pyarrow]"
pytest.param(
"timestamp[s][pyarrow]",
"datetime64[s]",
id="timestamp[s][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[ms][pyarrow]",
"datetime64[ms]",
id="timestamp[ms][pyarrow]",
marks=[skip_if_no(package="pyarrow"), pandas_old_version],
),
pytest.param(
"timestamp[us][pyarrow]",
"datetime64[us]",
id="timestamp[us][pyarrow]",
marks=[skip_if_no(package="pyarrow"), pandas_old_version],
),
pytest.param(
"timestamp[ns][pyarrow]",
"datetime64[ns]",
id="timestamp[ns][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, UTC][pyarrow]",
"datetime64[s]",
id="timestamp[s, UTC][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, America/New_York][pyarrow]",
"datetime64[s]",
id="timestamp[s, America/New_York][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, +08:00][pyarrow]",
"datetime64[s]",
id="timestamp[s, +08:00][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
],
)
def test_to_numpy_pandas_datetime(dtype, expected_dtype):
"""
Test the _to_numpy function with pandas.Series of datetime types.
"""
series = pd.Series(
[pd.Timestamp("2024-01-02T03:04:05"), pd.Timestamp("2024-01-02T03:04:06")],
dtype=dtype,
)
result = _to_numpy(series)
_check_result(result, np.datetime64)
assert result.dtype == expected_dtype

# Convert to UTC if the dtype is timezone-aware
if "," in str(dtype): # A hacky way to decide if the dtype is timezone-aware.
# TODO(pandas>=2.1): Simplify the if-else statement.
if Version(pd.__version__) < Version("2.1") and dtype.startswith("timestamp"):
# pandas 2.0 doesn't have the dt.tz_convert method for pyarrow.Timestamp.
series = pd.to_datetime(series, utc=True)
else:
series = series.dt.tz_convert("UTC")
# Remove time zone information and preserve local time.
expected_series = series.dt.tz_localize(tz=None)
npt.assert_array_equal(result, np.array(expected_series, dtype=expected_dtype))


########################################################################################
# Test the _to_numpy function with PyArrow arrays.
#
Expand Down
Loading