diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index 338dd87aa8c62..1144c767942d4 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -27,6 +27,7 @@ Exceptions and warnings errors.AbstractMethodError errors.AccessorRegistrationWarning errors.AttributeConflictWarning + errors.CategoricalConversionWarning errors.ClosedFileError errors.CSSWarning errors.DatabaseError @@ -36,6 +37,7 @@ Exceptions and warnings errors.EmptyDataError errors.IncompatibilityWarning errors.IndexingError + errors.InvalidColumnName errors.InvalidIndexError errors.IntCastingNaNError errors.MergeError @@ -49,6 +51,7 @@ Exceptions and warnings errors.ParserWarning errors.PerformanceWarning errors.PossibleDataLossError + errors.PossiblePrecisionLoss errors.PyperclipException errors.PyperclipWindowsException errors.SettingWithCopyError @@ -57,6 +60,7 @@ Exceptions and warnings errors.UndefinedVariableError errors.UnsortedIndexError errors.UnsupportedFunctionCall + errors.ValueLabelTypeMismatch Bug report function ------------------- diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 578431e9ab2d8..3788cb9867dbb 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -275,7 +275,7 @@ Other enhancements - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`) - Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`) - ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`) -- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, and :class:`.IndexingError` are now exposed in ``pandas.errors`` (:issue:`27656`) +- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError, :class:`.PossiblePrecisionLoss, :class:`.ValueLabelTypeMismatch, :class:`.InvalidColumnName, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`) - Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`) - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files) - :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index d7f5e7aab58ab..d0c9ef94f4453 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -479,10 +479,67 @@ class DatabaseError(OSError): """ +class PossiblePrecisionLoss(Warning): + """ + Warning raised by to_stata on a column with a value outside or equal to int64. + + When the column value is outside or equal to the int64 value the column is + converted to a float64 dtype. + + Examples + -------- + >>> df = pd.DataFrame({"s": pd.Series([1, 2**53], dtype=np.int64)}) + >>> df.to_stata('test') # doctest: +SKIP + ... # PossiblePrecisionLoss: Column converted from int64 to float64... + """ + + +class ValueLabelTypeMismatch(Warning): + """ + Warning raised by to_stata on a category column that contains non-string values. + + Examples + -------- + >>> df = pd.DataFrame({"categories": pd.Series(["a", 2], dtype="category")}) + >>> df.to_stata('test') # doctest: +SKIP + ... # ValueLabelTypeMismatch: Stata value labels (pandas categories) must be str... + """ + + +class InvalidColumnName(Warning): + """ + Warning raised by to_stata the column contains a non-valid stata name. + + Because the column name is an invalid Stata variable, the name needs to be + converted. + + Examples + -------- + >>> df = pd.DataFrame({"0categories": pd.Series([2, 2])}) + >>> df.to_stata('test') # doctest: +SKIP + ... # InvalidColumnName: Not all pandas column names were valid Stata variable... + """ + + +class CategoricalConversionWarning(Warning): + """ + Warning is raised when reading a partial labeled Stata file using a iterator. + + Examples + -------- + >>> from pandas.io.stata import StataReader + >>> with StataReader('dta_file', chunksize=2) as reader: # doctest: +SKIP + ... for i, block in enumerate(reader): + ... print(i, block)) + ... # CategoricalConversionWarning: One or more series with value labels... + """ + + __all__ = [ "AbstractMethodError", "AccessorRegistrationWarning", "AttributeConflictWarning", + "CategoricalConversionWarning", "ClosedFileError", "CSSWarning", "DatabaseError", @@ -492,6 +549,7 @@ class DatabaseError(OSError): "EmptyDataError", "IncompatibilityWarning", "IntCastingNaNError", + "InvalidColumnName", "InvalidIndexError", "IndexingError", "MergeError", @@ -505,6 +563,7 @@ class DatabaseError(OSError): "ParserWarning", "PerformanceWarning", "PossibleDataLossError", + "PossiblePrecisionLoss", "PyperclipException", "PyperclipWindowsException", "SettingWithCopyError", @@ -513,4 +572,5 @@ class DatabaseError(OSError): "UndefinedVariableError", "UnsortedIndexError", "UnsupportedFunctionCall", + "ValueLabelTypeMismatch", ] diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 3daa6d837349e..8305a4f6adf84 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -41,6 +41,12 @@ StorageOptions, WriteBuffer, ) +from pandas.errors import ( + CategoricalConversionWarning, + InvalidColumnName, + PossiblePrecisionLoss, + ValueLabelTypeMismatch, +) from pandas.util._decorators import ( Appender, doc, @@ -493,20 +499,12 @@ def g(x: datetime.datetime) -> int: """ -class PossiblePrecisionLoss(Warning): - pass - - precision_loss_doc: Final = """ Column converted from {0} to {1}, and some data are outside of the lossless conversion range. This may result in a loss of precision in the saved data. """ -class ValueLabelTypeMismatch(Warning): - pass - - value_label_mismatch_doc: Final = """ Stata value labels (pandas categories) must be strings. Column {0} contains non-string labels which will be converted to strings. Please check that the @@ -514,10 +512,6 @@ class ValueLabelTypeMismatch(Warning): """ -class InvalidColumnName(Warning): - pass - - invalid_name_doc: Final = """ Not all pandas column names were valid Stata variable names. The following replacements have been made: @@ -530,11 +524,7 @@ class InvalidColumnName(Warning): """ -class CategoricalConversionWarning(Warning): - pass - - -categorical_conversion_warning = """ +categorical_conversion_warning: Final = """ One or more series with value labels are not fully labeled. Reading this dataset with an iterator results in categorical variable with different categories. This occurs since it is not possible to know all possible values diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index 187d5399f5985..c6ca51b7763d9 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -35,6 +35,10 @@ "IncompatibilityWarning", "AttributeConflictWarning", "DatabaseError", + "PossiblePrecisionLoss", + "CategoricalConversionWarning", + "InvalidColumnName", + "ValueLabelTypeMismatch", ], ) def test_exception_importable(exc):