diff --git a/CHANGES.md b/CHANGES.md index 6c979cd..eee5e7f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,8 @@ 0.6 (unreleased) ---------------- +- Add ability to compare to Pandas DataFrames and store them as HDF5 files [#23] + 0.5 (2022-01-12) ---------------- diff --git a/README.rst b/README.rst index 3ac149a..39dd16b 100644 --- a/README.rst +++ b/README.rst @@ -15,7 +15,8 @@ in cases where the arrays are too large to conveniently hard-code them in the tests (e.g. ``np.testing.assert_allclose(x, [1, 2, 3])``). The basic idea is that you can write a test that generates a Numpy array (or -other related objects depending on the format). You can then either run the +other related objects depending on the format, e.g. pandas DataFrame). +You can then either run the tests in a mode to **generate** reference files from the arrays, or you can run the tests in **comparison** mode, which will compare the results of the tests to the reference ones within some tolerance. @@ -25,6 +26,7 @@ At the moment, the supported file formats for the reference files are: - A plain text-based format (based on Numpy ``loadtxt`` output) - The FITS format (requires `astropy `__). With this format, tests can return either a Numpy array for a FITS HDU object. +- A pandas HDF5 format using the pandas HDFStore For more information on how to write tests to do this, see the **Using** section below. diff --git a/pytest_arraydiff/plugin.py b/pytest_arraydiff/plugin.py index 8d54faf..c80d008 100755 --- a/pytest_arraydiff/plugin.py +++ b/pytest_arraydiff/plugin.py @@ -137,9 +137,43 @@ def write(filename, data, **kwargs): return np.savetxt(filename, data, **kwargs) +class PDHDFDiff(BaseDiff): + + extension = 'h5' + + @staticmethod + def read(filename): + import pandas as pd + return pd.read_hdf(filename) + + @staticmethod + def write(filename, data, **kwargs): + import pandas as pd + key = os.path.basename(filename).replace('.h5', '') + return data.to_hdf(filename, key, **kwargs) + + @classmethod + def compare(cls, reference_file, test_file, atol=None, rtol=None): + import pandas.testing as pdt + import pandas as pd + + ref_data = pd.read_hdf(reference_file) + test_data = pd.read_hdf(test_file) + try: + pdt.assert_frame_equal(ref_data, test_data) + except AssertionError as exc: + message = "\n\na: {0}".format(test_file) + '\n' + message += "b: {0}".format(reference_file) + '\n' + message += exc.args[0] + return False, message + else: + return True, "" + + FORMATS = {} FORMATS['fits'] = FITSDiff FORMATS['text'] = TextDiff +FORMATS['pd_hdf'] = PDHDFDiff def _download_file(url): diff --git a/setup.cfg b/setup.cfg index 6a63809..f7b87c7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,6 +36,8 @@ install_requires = [options.extras_require] test = astropy + pandas + tables [options.entry_points] pytest11 = diff --git a/tests/baseline/test_succeeds_func_pdhdf.h5 b/tests/baseline/test_succeeds_func_pdhdf.h5 new file mode 100644 index 0000000..7e17ccd Binary files /dev/null and b/tests/baseline/test_succeeds_func_pdhdf.h5 differ diff --git a/tests/test_pytest_arraydiff.py b/tests/test_pytest_arraydiff.py index 9749b27..12f133f 100644 --- a/tests/test_pytest_arraydiff.py +++ b/tests/test_pytest_arraydiff.py @@ -18,6 +18,13 @@ def test_succeeds_func_text(): return np.arange(3 * 5).reshape((3, 5)) +@pytest.mark.array_compare(file_format='pd_hdf', reference_dir=reference_dir) +def test_succeeds_func_pdhdf(): + pd = pytest.importorskip('pandas') + return pd.DataFrame(data=np.arange(20, dtype='int64'), + columns=['test_data']) + + @pytest.mark.array_compare(file_format='fits', reference_dir=reference_dir) def test_succeeds_func_fits(): return np.arange(3 * 5).reshape((3, 5)).astype(np.int64)