diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index eaee01eacb97b..077e9cef2a03e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -32,6 +32,7 @@ Other enhancements - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`) - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - :func:`pandas.merge` now validates the ``how`` parameter input (merge type) (:issue:`59435`) +- :func:`read_spss` now supports kwargs to be passed to pyreadstat (:issue:`56356`) - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) - :meth:`DataFrame.agg` called with ``axis=1`` and a ``func`` which relabels the result index now raises a ``NotImplementedError`` (:issue:`58807`). - :meth:`Index.get_loc` now accepts also subclasses of ``tuple`` as keys (:issue:`57922`) diff --git a/pandas/io/spss.py b/pandas/io/spss.py index e597463aee453..dfada10c719c9 100644 --- a/pandas/io/spss.py +++ b/pandas/io/spss.py @@ -1,6 +1,9 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import ( + TYPE_CHECKING, + Any, +) from pandas._libs import lib from pandas.compat._optional import import_optional_dependency @@ -24,6 +27,7 @@ def read_spss( usecols: Sequence[str] | None = None, convert_categoricals: bool = True, dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, + **kwargs: Any, ) -> DataFrame: """ Load an SPSS file from the file path, returning a DataFrame. @@ -47,6 +51,10 @@ def read_spss( nullable :class:`ArrowDtype` :class:`DataFrame` .. versionadded:: 2.0 + **kwargs + Additional keyword arguments that can be passed to :func:`pyreadstat.read_sav`. + + .. versionadded:: 3.0 Returns ------- @@ -74,7 +82,10 @@ def read_spss( usecols = list(usecols) # pyreadstat requires a list df, metadata = pyreadstat.read_sav( - stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals + stringify_path(path), + usecols=usecols, + apply_value_formats=convert_categoricals, + **kwargs, ) df.attrs = metadata.__dict__ if dtype_backend is not lib.no_default: diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index e118c90d9bc02..1aa9f6dca0303 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -65,6 +65,22 @@ def test_spss_labelled_str(datapath): tm.assert_frame_equal(df, expected) +@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") +@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") +def test_spss_kwargs(datapath): + # test file from the Haven project (https://haven.tidyverse.org/) + # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT + fname = datapath("io", "data", "spss", "labelled-str.sav") + + df = pd.read_spss(fname, convert_categoricals=True, row_limit=1) + expected = pd.DataFrame({"gender": ["Male"]}, dtype="category") + tm.assert_frame_equal(df, expected) + + df = pd.read_spss(fname, convert_categoricals=False, row_offset=1) + expected = pd.DataFrame({"gender": ["F"]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") @pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_umlauts(datapath):