Skip to content
Merged
11 changes: 11 additions & 0 deletions pygmt/clib/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,17 @@
numpy_dtype = np.float64
data = data.to_numpy(na_value=np.nan)

# Deal with timezone-aware datetime dtypes.
if isinstance(dtype, pd.DatetimeTZDtype): # pandas.DatetimeTZDtype
numpy_dtype = getattr(dtype, "base", None)
elif isinstance(dtype, pd.ArrowDtype) and hasattr(dtype.pyarrow_dtype, "tz"):
# pd.ArrowDtype[pa.Timestamp]
numpy_dtype = getattr(dtype, "numpy_dtype", None)
# TODO(pandas>=2.1): Remove the workaround for pandas<2.1.
if Version(pd.__version__) < Version("2.1"):
# In pandas 2.0, dtype.numpy_type is dtype("O").
numpy_dtype = np.dtype(f"M8[{dtype.pyarrow_dtype.unit}]") # type: ignore[assignment, attr-defined]

Check warning on line 204 in pygmt/clib/conversion.py

View check run for this annotation

Codecov / codecov/patch

pygmt/clib/conversion.py#L204

Added line #L204 was not covered by tests
Comment on lines +202 to +204
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a TODO here to remove this once we drop support for pandas 2.0? Should be after 2025-08-29 according to https://scientific-python.org/specs/spec-0000/#support-window


array = np.ascontiguousarray(data, dtype=numpy_dtype)

# Check if a np.object_ array can be converted to np.str_.
Expand Down
107 changes: 107 additions & 0 deletions pygmt/tests/test_clib_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,113 @@ def test_to_numpy_pandas_date(dtype, expected_dtype):
)


pandas_old_version = pytest.mark.xfail(
condition=Version(pd.__version__) < Version("2.1"),
reason="pandas 2.0 bug reported in https://github.com/pandas-dev/pandas/issues/52705",
)


@pytest.mark.parametrize(
("dtype", "expected_dtype"),
[
# NumPy datetime64 types. Only unit 's'/'ms'/'us'/'ns' are supported.
pytest.param("datetime64[s]", "datetime64[s]", id="datetime64[s]"),
pytest.param("datetime64[ms]", "datetime64[ms]", id="datetime64[ms]"),
pytest.param("datetime64[us]", "datetime64[us]", id="datetime64[us]"),
pytest.param("datetime64[ns]", "datetime64[ns]", id="datetime64[ns]"),
# pandas.DatetimeTZDtype can be given in two ways [tz is required]:
# 1. pandas.DatetimeTZDtype(unit, tz)
# 2. String aliases: "datetime64[unit, tz]"
pytest.param(
"datetime64[s, UTC]",
"datetime64[s]",
id="datetime64[s, tz=UTC]",
marks=pandas_old_version,
),
pytest.param(
"datetime64[s, America/New_York]",
"datetime64[s]",
id="datetime64[s, tz=America/New_York]",
marks=pandas_old_version,
),
pytest.param(
"datetime64[s, +07:30]",
"datetime64[s]",
id="datetime64[s, +07:30]",
marks=pandas_old_version,
),
# PyArrow timestamp types can be given in two ways [tz is optional]:
# 1. pd.ArrowDtype(pyarrow.Timestamp(unit, tz=tz))
# 2. String aliases: "timestamp[unit, tz][pyarrow]"
pytest.param(
"timestamp[s][pyarrow]",
"datetime64[s]",
id="timestamp[s][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[ms][pyarrow]",
"datetime64[ms]",
id="timestamp[ms][pyarrow]",
marks=[skip_if_no(package="pyarrow"), pandas_old_version],
),
pytest.param(
"timestamp[us][pyarrow]",
"datetime64[us]",
id="timestamp[us][pyarrow]",
marks=[skip_if_no(package="pyarrow"), pandas_old_version],
),
pytest.param(
"timestamp[ns][pyarrow]",
"datetime64[ns]",
id="timestamp[ns][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, UTC][pyarrow]",
"datetime64[s]",
id="timestamp[s, UTC][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, America/New_York][pyarrow]",
"datetime64[s]",
id="timestamp[s, America/New_York][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, +08:00][pyarrow]",
"datetime64[s]",
id="timestamp[s, +08:00][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
],
)
def test_to_numpy_pandas_datetime(dtype, expected_dtype):
"""
Test the _to_numpy function with pandas.Series of datetime types.
"""
series = pd.Series(
[pd.Timestamp("2024-01-02T03:04:05"), pd.Timestamp("2024-01-02T03:04:06")],
dtype=dtype,
)
result = _to_numpy(series)
_check_result(result, np.datetime64)
assert result.dtype == expected_dtype

# Convert to UTC if the dtype is timezone-aware
if "," in str(dtype): # A hacky way to decide if the dtype is timezone-aware.
# TODO(pandas>=2.1): Simplify the if-else statement.
if Version(pd.__version__) < Version("2.1") and dtype.startswith("timestamp"):
# pandas 2.0 doesn't have the dt.tz_convert method for pyarrow.Timestamp.
series = pd.to_datetime(series, utc=True)
else:
series = series.dt.tz_convert("UTC")
# Remove time zone information and preserve local time.
expected_series = series.dt.tz_localize(tz=None)
npt.assert_array_equal(result, np.array(expected_series, dtype=expected_dtype))


########################################################################################
# Test the _to_numpy function with PyArrow arrays.
#
Expand Down
Loading