From 2f108cc487ec7ef96239c306e9d470eb4ba5184f Mon Sep 17 00:00:00 2001 From: Wolfgang Kerzendorf Date: Sat, 17 Oct 2020 10:37:49 +0200 Subject: [PATCH 1/9] add initial pandas HDF fileformat --- pytest_arraydiff/plugin.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pytest_arraydiff/plugin.py b/pytest_arraydiff/plugin.py index 8d54faf..38a9d1a 100755 --- a/pytest_arraydiff/plugin.py +++ b/pytest_arraydiff/plugin.py @@ -137,9 +137,41 @@ def write(filename, data, **kwargs): return np.savetxt(filename, data, **kwargs) +class PDHDFDiff(BaseDiff): + + extension = 'h5' + + @staticmethod + def read(filename): + import pandas as pd + return pd.read_hdf(filename) + + @staticmethod + def write(filename, data, **kwargs): + import pandas as pd + key = os.path.basename(filename).replace('.h5', '') + return data.to_hdf(filename, key, **kwargs) + + @classmethod + def compare(cls, reference_file, test_file, atol=None, rtol=None): + import pandas.testing as pdt + + + try: + pdt.assert_frame_equal(reference_file, test_file) + except AssertionError as exc: + message = "\n\na: {0}".format(test_file) + '\n' + message += "b: {0}".format(reference_file) + '\n' + message += exc.args[0] + return False, message + else: + return True, "" + + FORMATS = {} FORMATS['fits'] = FITSDiff FORMATS['text'] = TextDiff +FORMATS['pdhdf'] = PDHDFDiff def _download_file(url): From 081d236a0aaf395ce56448c786a66891a5c65cb5 Mon Sep 17 00:00:00 2001 From: Wolfgang Kerzendorf Date: Tue, 20 Oct 2020 00:03:57 +0200 Subject: [PATCH 2/9] add tests and fix comparer for pandas HDF --- README.rst | 4 +++- pytest_arraydiff/plugin.py | 6 ++++-- tests/test_pytest_arraydiff.py | 4 ++++ 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 3ac149a..39dd16b 100644 --- a/README.rst +++ b/README.rst @@ -15,7 +15,8 @@ in cases where the arrays are too large to conveniently hard-code them in the tests (e.g. ``np.testing.assert_allclose(x, [1, 2, 3])``). The basic idea is that you can write a test that generates a Numpy array (or -other related objects depending on the format). You can then either run the +other related objects depending on the format, e.g. pandas DataFrame). +You can then either run the tests in a mode to **generate** reference files from the arrays, or you can run the tests in **comparison** mode, which will compare the results of the tests to the reference ones within some tolerance. @@ -25,6 +26,7 @@ At the moment, the supported file formats for the reference files are: - A plain text-based format (based on Numpy ``loadtxt`` output) - The FITS format (requires `astropy `__). With this format, tests can return either a Numpy array for a FITS HDU object. +- A pandas HDF5 format using the pandas HDFStore For more information on how to write tests to do this, see the **Using** section below. diff --git a/pytest_arraydiff/plugin.py b/pytest_arraydiff/plugin.py index 38a9d1a..9aea776 100755 --- a/pytest_arraydiff/plugin.py +++ b/pytest_arraydiff/plugin.py @@ -155,10 +155,12 @@ def write(filename, data, **kwargs): @classmethod def compare(cls, reference_file, test_file, atol=None, rtol=None): import pandas.testing as pdt + import pandas as pd - + ref_data = pd.read_hdf(reference_file) + test_data = pd.read_hdf(test_file) try: - pdt.assert_frame_equal(reference_file, test_file) + pdt.assert_frame_equal(ref_data, test_data) except AssertionError as exc: message = "\n\na: {0}".format(test_file) + '\n' message += "b: {0}".format(reference_file) + '\n' diff --git a/tests/test_pytest_arraydiff.py b/tests/test_pytest_arraydiff.py index 9749b27..045c72a 100644 --- a/tests/test_pytest_arraydiff.py +++ b/tests/test_pytest_arraydiff.py @@ -17,6 +17,10 @@ def test_succeeds_func_default(): def test_succeeds_func_text(): return np.arange(3 * 5).reshape((3, 5)) +@pytest.mark.array_compare(file_format='pdhdf', reference_dir=reference_dir) +def test_succeeds_func_pdhdf(): + import pandas as pd + return pd.DataFrame(data=np.arange(20), columns=['test_data']) @pytest.mark.array_compare(file_format='fits', reference_dir=reference_dir) def test_succeeds_func_fits(): From 416288d75cfe8dc3378c87af9d56cc495ab2a003 Mon Sep 17 00:00:00 2001 From: Wolfgang Kerzendorf Date: Tue, 20 Oct 2020 00:04:39 +0200 Subject: [PATCH 3/9] add baseline for pandas HDF --- tests/baseline/test_succeeds_func_pdhdf.h5 | Bin 0 -> 7032 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/baseline/test_succeeds_func_pdhdf.h5 diff --git a/tests/baseline/test_succeeds_func_pdhdf.h5 b/tests/baseline/test_succeeds_func_pdhdf.h5 new file mode 100644 index 0000000000000000000000000000000000000000..7e17ccd397263814b1dfadc20c7994f0d913448f GIT binary patch literal 7032 zcmeHLOH&g;5T50^KzK`mdf~{$RG_qqqfvsP3JE5Gf>T_R1-5`qYIj4y%_AN>_zx&g z{sfO6Irvh|1__eLJTd%bmJB=-A7o6s#;tD#-cunFu zqo%PZ;2U;Lb*Z|n0j$RL_5mH*EVL}6$WoFs)zjsCrNVOEnuyD}8IhmO(hM?cUe`kX6w0q|R`XAm3zgbpsr)=&tzFcAE<4NpMsdvK{?wrW z%U4!-)&5iiBroF-m;T=t4|p;D(KE5xj+GdulAbt-CXU!+H2c>#5;k# z4{V6{X<*JNm6g(hp`{qEN^eChvQK81FDC3bXQ*3UCFk<|1i1lX5tN$~=P2G8+9HUH zEz50Kel0j^*=Q%k-FQ6R@~nMm$tdYJ9Pf8vd%n|jDP99K1TVSl?4#^lyk8&ov)y{L z;ketVO#dbItyLEv0Oq6k-AnyEujy$dgM3_%A9zkZi0kRbnYj&TA9RdpKoh1P0~^{W z$6Yk9A=HEWN^c|2v4bNm5Bhpz{XBcSy=QrPoM9S^CoJn;yaQU4`bH%D@6@A%=(zs% z>0dYV9UUDwg2|a(r%}vMCy38aZ)B~vpQV*maxLF>6_XjA3K?kcGuR^0dj`HHg<@|c7d^V4E+xsoZU!!-80nCd7 z&vN}%)3+O_MnSqtV^hK)dW1&$AY(PvuH$0Q^YU=W>XY&=T^}&-g%n1{UCEMu$(wgL zZ(F5mJx&h43>D?&v)9VYGpOS;D@yv4({r3RDCpGhQXrbVCtRGLzu?U21iFXirsn6- zdi;C5#P3m#8`#^n_s4iC@_k@~S0KWRi8y!hLBv0^u7baa_T&1$4)H=a4uh^`Ax=bI z@FV>1{rmbmzCp=F RivXF%1M))@fBw$-@e`|UE#LqE literal 0 HcmV?d00001 From e420cd169955bec3aca03e3ff14ed5c6cf21a939 Mon Sep 17 00:00:00 2001 From: Wolfgang Kerzendorf Date: Fri, 23 Oct 2020 10:40:08 -0400 Subject: [PATCH 4/9] modified to pd_hdf --- pytest_arraydiff/plugin.py | 2 +- tests/test_pytest_arraydiff.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pytest_arraydiff/plugin.py b/pytest_arraydiff/plugin.py index 9aea776..c80d008 100755 --- a/pytest_arraydiff/plugin.py +++ b/pytest_arraydiff/plugin.py @@ -173,7 +173,7 @@ def compare(cls, reference_file, test_file, atol=None, rtol=None): FORMATS = {} FORMATS['fits'] = FITSDiff FORMATS['text'] = TextDiff -FORMATS['pdhdf'] = PDHDFDiff +FORMATS['pd_hdf'] = PDHDFDiff def _download_file(url): diff --git a/tests/test_pytest_arraydiff.py b/tests/test_pytest_arraydiff.py index 045c72a..5501a18 100644 --- a/tests/test_pytest_arraydiff.py +++ b/tests/test_pytest_arraydiff.py @@ -17,7 +17,7 @@ def test_succeeds_func_default(): def test_succeeds_func_text(): return np.arange(3 * 5).reshape((3, 5)) -@pytest.mark.array_compare(file_format='pdhdf', reference_dir=reference_dir) +@pytest.mark.array_compare(file_format='pd_hdf', reference_dir=reference_dir) def test_succeeds_func_pdhdf(): import pandas as pd return pd.DataFrame(data=np.arange(20), columns=['test_data']) From 6f9573411b2185d39cf21d43f3e4da3caf24e998 Mon Sep 17 00:00:00 2001 From: Wolfgang Kerzendorf Date: Thu, 29 Oct 2020 10:34:35 -0400 Subject: [PATCH 5/9] add pandas dependency --- .travis.yml | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..037c4c6 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,75 @@ +# We set the language to c because python isn't supported on the MacOS X nodes +# on Travis. However, the language ends up being irrelevant anyway, since we +# install Python ourselves using conda. +language: c + +os: + - linux + +# Use Travis' container-based architecture +sudo: false + +notifications: + slack: + on_success: never + on_failure: always + secure: "TGQd3tbCDD4yz71iDgcwydUCJERx9MGOAHOtVc6SLMANHXmrXl1vIdAChr4P/1LPqLawjUC/9mqBZTImQj/Pg5PaIVnz7K0PN/fTVHqCWERZoeW5KheQ6Wg89a9B54aDTGzvNuRH83l3jQBYfa9ZuCv5mpeVOIzPcBFUrD+9DqA93DJc1NIfwlyJStyXS7aVHhDsD318GZtxY5ZL1pA0GWfde68vc5/Ng6QLqqTeogEZ0reVsXDAijjnOAQsnzYBsZH86vS3JldihAfMygY2rr9jURfpx0EQivCBeJsKtm9IVEh4Iy54TzxyS87KRW1DGjPrIIDzdq2BXfKF+zTiy9QUHzwijJCE4EusCHjaITEis9lq77g9pQMDXCCBhKsgs69lYSSxmhD8VahH0Ex5Pj5aLy3X71yIAA95Xb8q0YPPsjVHF4R2F91S4QMPxyfddym7JiBvNWOPZ2Jye+an+H5hgRZvaSClUUjIT7MDX3UkhzvMrAD4n7Yl64wcPdmj4GJD04aOq55pCfEw088GDL64HAYYnCpEK+hkeKSpzYFjJEwPgC5dLY7RNUwm0tuyRcnLwoRMYLVMT7TWoSfgPosv7kciIkS5c6lmqPRV11AcJARPP6+XIUJ333rkqdLQy59VtiBASQRSxsZKqewEaYv5u+8g+y2vmMLQKkFPW60=" + if: type = cron + +env: + global: + # The following versions are the 'default' for tests, unless + # overidden underneath. They are defined here in order to save having + # to repeat them for all configurations. + - TOXENV='test' + - TOXARGS='-v' + - TOXPOSARGS='' + - CONDA_DEPENDENCIES="pandas pytables" + +matrix: + include: + + - language: python + python: 3.6 + name: Python 3.6 and pytest 4.6 (Linux) + env: TOXENV=py36-test-pytest46 + + - os: windows + name: Python 3.6 and pytest 5.0 (Windows) + env: PYTHON_VERSION=3.6 + TOXENV=py36-test-pytest50 + + - os: osx + name: Python 3.7 and pytest 5.1 (MacOS X) + env: PYTHON_VERSION=3.7 + TOXENV=py37-test-pytest51 + + - language: python + python: 3.7 + name: Python 3.7 and pytest 5.2 (Linux) + env: TOXENV=py37-test-pytest52 + + - os: windows + name: Python 3.8 and pytest 5.3 (Windows) + env: PYTHON_VERSION=3.8 + TOXENV=py38-test-pytest53 + + - language: python + python: 3.8 + name: Python 3.8 and pytest 6.0 (Linux) + env: TOXENV=py38-test-pytest60 + + - os: osx + name: Python 3.8 and pytest dev (MacOS X) + env: PYTHON_VERSION=3.8 + TOXENV=py38-test-pytestdev + +install: + - if [[ $TRAVIS_OS_NAME == osx || $TRAVIS_OS_NAME == windows ]]; then + git clone git://github.com/astropy/ci-helpers.git; + source ci-helpers/travis/setup_python.sh; + fi + +script: + - pip install tox + - tox $TOXARGS -- $TOXPOSARGS From f0680919d4f5978814ec807b08b5f20b9db57dfc Mon Sep 17 00:00:00 2001 From: Wolfgang Kerzendorf Date: Mon, 2 Nov 2020 15:08:30 -0500 Subject: [PATCH 6/9] fix import or skip for pandas --- tests/test_pytest_arraydiff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pytest_arraydiff.py b/tests/test_pytest_arraydiff.py index 5501a18..3e17d68 100644 --- a/tests/test_pytest_arraydiff.py +++ b/tests/test_pytest_arraydiff.py @@ -19,7 +19,7 @@ def test_succeeds_func_text(): @pytest.mark.array_compare(file_format='pd_hdf', reference_dir=reference_dir) def test_succeeds_func_pdhdf(): - import pandas as pd + pd = pytest.importorskip('pandas') return pd.DataFrame(data=np.arange(20), columns=['test_data']) @pytest.mark.array_compare(file_format='fits', reference_dir=reference_dir) From bdfa7f3291684a1aea630f14ef995580fe039689 Mon Sep 17 00:00:00 2001 From: Wolfgang Kerzendorf Date: Tue, 3 Nov 2020 14:17:14 -0500 Subject: [PATCH 7/9] add tables pandas as dependency; remove from travis.yml --- .travis.yml | 1 - setup.cfg | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 037c4c6..4dcf0f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,6 @@ env: - TOXENV='test' - TOXARGS='-v' - TOXPOSARGS='' - - CONDA_DEPENDENCIES="pandas pytables" matrix: include: diff --git a/setup.cfg b/setup.cfg index 6a63809..f7b87c7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,6 +36,8 @@ install_requires = [options.extras_require] test = astropy + pandas + tables [options.entry_points] pytest11 = From 32becaf945da965566fe8c762b8911c684a8b50a Mon Sep 17 00:00:00 2001 From: Wolfgang Kerzendorf Date: Wed, 2 Dec 2020 14:01:24 -0500 Subject: [PATCH 8/9] fix windows trouble --- tests/test_pytest_arraydiff.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_pytest_arraydiff.py b/tests/test_pytest_arraydiff.py index 3e17d68..12f133f 100644 --- a/tests/test_pytest_arraydiff.py +++ b/tests/test_pytest_arraydiff.py @@ -17,10 +17,13 @@ def test_succeeds_func_default(): def test_succeeds_func_text(): return np.arange(3 * 5).reshape((3, 5)) + @pytest.mark.array_compare(file_format='pd_hdf', reference_dir=reference_dir) def test_succeeds_func_pdhdf(): pd = pytest.importorskip('pandas') - return pd.DataFrame(data=np.arange(20), columns=['test_data']) + return pd.DataFrame(data=np.arange(20, dtype='int64'), + columns=['test_data']) + @pytest.mark.array_compare(file_format='fits', reference_dir=reference_dir) def test_succeeds_func_fits(): From 1e12df430a72c99d94b55c5856aa0e97c0d306b2 Mon Sep 17 00:00:00 2001 From: Wolfgang Kerzendorf Date: Thu, 20 Jan 2022 19:00:11 +0100 Subject: [PATCH 9/9] add changelog message --- .travis.yml | 74 ----------------------------------------------------- CHANGES.md | 2 ++ 2 files changed, 2 insertions(+), 74 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 4dcf0f0..0000000 --- a/.travis.yml +++ /dev/null @@ -1,74 +0,0 @@ -# We set the language to c because python isn't supported on the MacOS X nodes -# on Travis. However, the language ends up being irrelevant anyway, since we -# install Python ourselves using conda. -language: c - -os: - - linux - -# Use Travis' container-based architecture -sudo: false - -notifications: - slack: - on_success: never - on_failure: always - secure: "TGQd3tbCDD4yz71iDgcwydUCJERx9MGOAHOtVc6SLMANHXmrXl1vIdAChr4P/1LPqLawjUC/9mqBZTImQj/Pg5PaIVnz7K0PN/fTVHqCWERZoeW5KheQ6Wg89a9B54aDTGzvNuRH83l3jQBYfa9ZuCv5mpeVOIzPcBFUrD+9DqA93DJc1NIfwlyJStyXS7aVHhDsD318GZtxY5ZL1pA0GWfde68vc5/Ng6QLqqTeogEZ0reVsXDAijjnOAQsnzYBsZH86vS3JldihAfMygY2rr9jURfpx0EQivCBeJsKtm9IVEh4Iy54TzxyS87KRW1DGjPrIIDzdq2BXfKF+zTiy9QUHzwijJCE4EusCHjaITEis9lq77g9pQMDXCCBhKsgs69lYSSxmhD8VahH0Ex5Pj5aLy3X71yIAA95Xb8q0YPPsjVHF4R2F91S4QMPxyfddym7JiBvNWOPZ2Jye+an+H5hgRZvaSClUUjIT7MDX3UkhzvMrAD4n7Yl64wcPdmj4GJD04aOq55pCfEw088GDL64HAYYnCpEK+hkeKSpzYFjJEwPgC5dLY7RNUwm0tuyRcnLwoRMYLVMT7TWoSfgPosv7kciIkS5c6lmqPRV11AcJARPP6+XIUJ333rkqdLQy59VtiBASQRSxsZKqewEaYv5u+8g+y2vmMLQKkFPW60=" - if: type = cron - -env: - global: - # The following versions are the 'default' for tests, unless - # overidden underneath. They are defined here in order to save having - # to repeat them for all configurations. - - TOXENV='test' - - TOXARGS='-v' - - TOXPOSARGS='' - -matrix: - include: - - - language: python - python: 3.6 - name: Python 3.6 and pytest 4.6 (Linux) - env: TOXENV=py36-test-pytest46 - - - os: windows - name: Python 3.6 and pytest 5.0 (Windows) - env: PYTHON_VERSION=3.6 - TOXENV=py36-test-pytest50 - - - os: osx - name: Python 3.7 and pytest 5.1 (MacOS X) - env: PYTHON_VERSION=3.7 - TOXENV=py37-test-pytest51 - - - language: python - python: 3.7 - name: Python 3.7 and pytest 5.2 (Linux) - env: TOXENV=py37-test-pytest52 - - - os: windows - name: Python 3.8 and pytest 5.3 (Windows) - env: PYTHON_VERSION=3.8 - TOXENV=py38-test-pytest53 - - - language: python - python: 3.8 - name: Python 3.8 and pytest 6.0 (Linux) - env: TOXENV=py38-test-pytest60 - - - os: osx - name: Python 3.8 and pytest dev (MacOS X) - env: PYTHON_VERSION=3.8 - TOXENV=py38-test-pytestdev - -install: - - if [[ $TRAVIS_OS_NAME == osx || $TRAVIS_OS_NAME == windows ]]; then - git clone git://github.com/astropy/ci-helpers.git; - source ci-helpers/travis/setup_python.sh; - fi - -script: - - pip install tox - - tox $TOXARGS -- $TOXPOSARGS diff --git a/CHANGES.md b/CHANGES.md index 6c979cd..eee5e7f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,6 +1,8 @@ 0.6 (unreleased) ---------------- +- Add ability to compare to Pandas DataFrames and store them as HDF5 files [#23] + 0.5 (2022-01-12) ----------------